tidy.c 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2018 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: John Coggeshall <john@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #ifdef HAVE_CONFIG_H
  19. #include "config.h"
  20. #endif
  21. #include "php.h"
  22. #include "php_tidy.h"
  23. #if HAVE_TIDY
  24. #include "php_ini.h"
  25. #include "ext/standard/info.h"
  26. #if HAVE_TIDY_H
  27. #include "tidy.h"
  28. #elif HAVE_TIDYP_H
  29. #include "tidyp.h"
  30. #endif
  31. #if HAVE_TIDYBUFFIO_H
  32. #include "tidybuffio.h"
  33. #else
  34. #include "buffio.h"
  35. #endif
  36. /* compatibility with older versions of libtidy */
  37. #ifndef TIDY_CALL
  38. #define TIDY_CALL
  39. #endif
  40. /* {{{ ext/tidy macros
  41. */
  42. #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
  43. #define TIDY_SET_CONTEXT \
  44. zval *object = getThis();
  45. #define TIDY_FETCH_OBJECT \
  46. PHPTidyObj *obj; \
  47. TIDY_SET_CONTEXT; \
  48. if (object) { \
  49. if (zend_parse_parameters_none() == FAILURE) { \
  50. return; \
  51. } \
  52. } else { \
  53. if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "O", &object, tidy_ce_doc) == FAILURE) { \
  54. RETURN_FALSE; \
  55. } \
  56. } \
  57. obj = Z_TIDY_P(object); \
  58. #define TIDY_FETCH_INITIALIZED_OBJECT \
  59. TIDY_FETCH_OBJECT; \
  60. if (!obj->ptdoc->initialized) { \
  61. zend_throw_error(NULL, "tidy object is not initialized"); \
  62. return; \
  63. }
  64. #define TIDY_FETCH_ONLY_OBJECT \
  65. PHPTidyObj *obj; \
  66. TIDY_SET_CONTEXT; \
  67. if (zend_parse_parameters_none() == FAILURE) { \
  68. return; \
  69. } \
  70. obj = Z_TIDY_P(object); \
  71. #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
  72. if(_val) { \
  73. if(Z_TYPE_P(_val) == IS_ARRAY) { \
  74. _php_tidy_apply_config_array(_doc, Z_ARRVAL_P(_val)); \
  75. } else { \
  76. convert_to_string_ex(_val); \
  77. TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_P(_val)); \
  78. switch (tidyLoadConfig(_doc, Z_STRVAL_P(_val))) { \
  79. case -1: \
  80. php_error_docref(NULL, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_P(_val)); \
  81. break; \
  82. case 1: \
  83. php_error_docref(NULL, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_P(_val)); \
  84. break; \
  85. } \
  86. } \
  87. }
  88. #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
  89. { \
  90. zend_class_entry ce; \
  91. INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
  92. ce.create_object = tidy_object_new_ ## name; \
  93. tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent); \
  94. tidy_ce_ ## name->ce_flags |= __flags; \
  95. memcpy(&tidy_object_handlers_ ## name, &std_object_handlers, sizeof(zend_object_handlers)); \
  96. tidy_object_handlers_ ## name.clone_obj = NULL; \
  97. }
  98. #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
  99. #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
  100. #ifndef TRUE
  101. #define TRUE 1
  102. #endif
  103. #ifndef FALSE
  104. #define FALSE 0
  105. #endif
  106. #define ADD_PROPERTY_STRING(_table, _key, _string) \
  107. { \
  108. zval tmp; \
  109. if (_string) { \
  110. ZVAL_STRING(&tmp, (char *)_string); \
  111. } else { \
  112. ZVAL_EMPTY_STRING(&tmp); \
  113. } \
  114. zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
  115. }
  116. #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
  117. { \
  118. zval tmp; \
  119. if (_string) { \
  120. ZVAL_STRINGL(&tmp, (char *)_string, _len); \
  121. } else { \
  122. ZVAL_EMPTY_STRING(&tmp); \
  123. } \
  124. zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
  125. }
  126. #define ADD_PROPERTY_LONG(_table, _key, _long) \
  127. { \
  128. zval tmp; \
  129. ZVAL_LONG(&tmp, _long); \
  130. zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
  131. }
  132. #define ADD_PROPERTY_NULL(_table, _key) \
  133. { \
  134. zval tmp; \
  135. ZVAL_NULL(&tmp); \
  136. zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
  137. }
  138. #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
  139. { \
  140. zval tmp; \
  141. ZVAL_BOOL(&tmp, _bool); \
  142. zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
  143. }
  144. #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
  145. if (php_check_open_basedir(filename)) { \
  146. RETURN_FALSE; \
  147. } \
  148. #define TIDY_SET_DEFAULT_CONFIG(_doc) \
  149. if (TG(default_config) && TG(default_config)[0]) { \
  150. if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
  151. php_error_docref(NULL, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
  152. } \
  153. }
  154. /* }}} */
  155. /* {{{ ext/tidy structs
  156. */
  157. typedef struct _PHPTidyDoc PHPTidyDoc;
  158. typedef struct _PHPTidyObj PHPTidyObj;
  159. typedef enum {
  160. is_node,
  161. is_doc
  162. } tidy_obj_type;
  163. typedef enum {
  164. is_root_node,
  165. is_html_node,
  166. is_head_node,
  167. is_body_node
  168. } tidy_base_nodetypes;
  169. struct _PHPTidyDoc {
  170. TidyDoc doc;
  171. TidyBuffer *errbuf;
  172. unsigned int ref_count;
  173. unsigned int initialized:1;
  174. };
  175. struct _PHPTidyObj {
  176. TidyNode node;
  177. tidy_obj_type type;
  178. PHPTidyDoc *ptdoc;
  179. zend_object std;
  180. };
  181. static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
  182. return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
  183. }
  184. #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
  185. /* }}} */
  186. /* {{{ ext/tidy prototypes
  187. */
  188. static zend_string *php_tidy_file_to_mem(char *, zend_bool);
  189. static void tidy_object_free_storage(zend_object *);
  190. static zend_object *tidy_object_new_node(zend_class_entry *);
  191. static zend_object *tidy_object_new_doc(zend_class_entry *);
  192. static zval * tidy_instanciate(zend_class_entry *, zval *);
  193. static int tidy_doc_cast_handler(zval *, zval *, int);
  194. static int tidy_node_cast_handler(zval *, zval *, int);
  195. static void tidy_doc_update_properties(PHPTidyObj *);
  196. static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
  197. static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
  198. static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
  199. static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
  200. static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
  201. static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
  202. static void _php_tidy_register_tags(INIT_FUNC_ARGS);
  203. static PHP_INI_MH(php_tidy_set_clean_output);
  204. static void php_tidy_clean_output_start(const char *name, size_t name_len);
  205. static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
  206. static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
  207. static PHP_MINIT_FUNCTION(tidy);
  208. static PHP_MSHUTDOWN_FUNCTION(tidy);
  209. static PHP_RINIT_FUNCTION(tidy);
  210. static PHP_MINFO_FUNCTION(tidy);
  211. static PHP_FUNCTION(tidy_getopt);
  212. static PHP_FUNCTION(tidy_parse_string);
  213. static PHP_FUNCTION(tidy_parse_file);
  214. static PHP_FUNCTION(tidy_clean_repair);
  215. static PHP_FUNCTION(tidy_repair_string);
  216. static PHP_FUNCTION(tidy_repair_file);
  217. static PHP_FUNCTION(tidy_diagnose);
  218. static PHP_FUNCTION(tidy_get_output);
  219. static PHP_FUNCTION(tidy_get_error_buffer);
  220. static PHP_FUNCTION(tidy_get_release);
  221. static PHP_FUNCTION(tidy_get_config);
  222. static PHP_FUNCTION(tidy_get_status);
  223. static PHP_FUNCTION(tidy_get_html_ver);
  224. #if HAVE_TIDYOPTGETDOC
  225. static PHP_FUNCTION(tidy_get_opt_doc);
  226. #endif
  227. static PHP_FUNCTION(tidy_is_xhtml);
  228. static PHP_FUNCTION(tidy_is_xml);
  229. static PHP_FUNCTION(tidy_error_count);
  230. static PHP_FUNCTION(tidy_warning_count);
  231. static PHP_FUNCTION(tidy_access_count);
  232. static PHP_FUNCTION(tidy_config_count);
  233. static PHP_FUNCTION(tidy_get_root);
  234. static PHP_FUNCTION(tidy_get_html);
  235. static PHP_FUNCTION(tidy_get_head);
  236. static PHP_FUNCTION(tidy_get_body);
  237. static TIDY_DOC_METHOD(__construct);
  238. static TIDY_DOC_METHOD(parseFile);
  239. static TIDY_DOC_METHOD(parseString);
  240. static TIDY_NODE_METHOD(hasChildren);
  241. static TIDY_NODE_METHOD(hasSiblings);
  242. static TIDY_NODE_METHOD(isComment);
  243. static TIDY_NODE_METHOD(isHtml);
  244. static TIDY_NODE_METHOD(isText);
  245. static TIDY_NODE_METHOD(isJste);
  246. static TIDY_NODE_METHOD(isAsp);
  247. static TIDY_NODE_METHOD(isPhp);
  248. static TIDY_NODE_METHOD(getParent);
  249. static TIDY_NODE_METHOD(__construct);
  250. /* }}} */
  251. ZEND_DECLARE_MODULE_GLOBALS(tidy)
  252. PHP_INI_BEGIN()
  253. STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
  254. STD_PHP_INI_ENTRY("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
  255. PHP_INI_END()
  256. /* {{{ arginfo */
  257. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
  258. ZEND_ARG_INFO(0, input)
  259. ZEND_ARG_INFO(0, config_options)
  260. ZEND_ARG_INFO(0, encoding)
  261. ZEND_END_ARG_INFO()
  262. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_error_buffer, 0, 0, 1)
  263. ZEND_ARG_INFO(0, object)
  264. ZEND_END_ARG_INFO()
  265. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_output, 0, 0, 1)
  266. ZEND_ARG_INFO(0, object)
  267. ZEND_END_ARG_INFO()
  268. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
  269. ZEND_ARG_INFO(0, file)
  270. ZEND_ARG_INFO(0, config_options)
  271. ZEND_ARG_INFO(0, encoding)
  272. ZEND_ARG_INFO(0, use_include_path)
  273. ZEND_END_ARG_INFO()
  274. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_clean_repair, 0, 0, 1)
  275. ZEND_ARG_INFO(0, object)
  276. ZEND_END_ARG_INFO()
  277. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
  278. ZEND_ARG_INFO(0, data)
  279. ZEND_ARG_INFO(0, config_file)
  280. ZEND_ARG_INFO(0, encoding)
  281. ZEND_END_ARG_INFO()
  282. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
  283. ZEND_ARG_INFO(0, filename)
  284. ZEND_ARG_INFO(0, config_file)
  285. ZEND_ARG_INFO(0, encoding)
  286. ZEND_ARG_INFO(0, use_include_path)
  287. ZEND_END_ARG_INFO()
  288. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_diagnose, 0, 0, 1)
  289. ZEND_ARG_INFO(0, object)
  290. ZEND_END_ARG_INFO()
  291. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
  292. ZEND_END_ARG_INFO()
  293. #if HAVE_TIDYOPTGETDOC
  294. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
  295. ZEND_ARG_INFO(0, resource)
  296. ZEND_ARG_INFO(0, optname)
  297. ZEND_END_ARG_INFO()
  298. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc_method, 0, 0, 1)
  299. ZEND_ARG_INFO(0, optname)
  300. ZEND_END_ARG_INFO()
  301. #endif
  302. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_config, 0, 0, 1)
  303. ZEND_ARG_INFO(0, object)
  304. ZEND_END_ARG_INFO()
  305. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_status, 0, 0, 1)
  306. ZEND_ARG_INFO(0, object)
  307. ZEND_END_ARG_INFO()
  308. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html_ver, 0, 0, 1)
  309. ZEND_ARG_INFO(0, object)
  310. ZEND_END_ARG_INFO()
  311. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xhtml, 0, 0, 1)
  312. ZEND_ARG_INFO(0, object)
  313. ZEND_END_ARG_INFO()
  314. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xml, 0, 0, 1)
  315. ZEND_ARG_INFO(0, object)
  316. ZEND_END_ARG_INFO()
  317. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_error_count, 0, 0, 1)
  318. ZEND_ARG_INFO(0, object)
  319. ZEND_END_ARG_INFO()
  320. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_warning_count, 0, 0, 1)
  321. ZEND_ARG_INFO(0, object)
  322. ZEND_END_ARG_INFO()
  323. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_access_count, 0, 0, 1)
  324. ZEND_ARG_INFO(0, object)
  325. ZEND_END_ARG_INFO()
  326. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_config_count, 0, 0, 1)
  327. ZEND_ARG_INFO(0, object)
  328. ZEND_END_ARG_INFO()
  329. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 1)
  330. ZEND_ARG_INFO(0, option)
  331. ZEND_END_ARG_INFO()
  332. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_root, 0)
  333. ZEND_END_ARG_INFO()
  334. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html, 0)
  335. ZEND_END_ARG_INFO()
  336. ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_head, 0)
  337. ZEND_END_ARG_INFO()
  338. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
  339. ZEND_ARG_INFO(0, tidy)
  340. ZEND_END_ARG_INFO()
  341. ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_construct, 0, 0, 0)
  342. ZEND_ARG_INFO(0, filename)
  343. ZEND_ARG_INFO(0, config_file)
  344. ZEND_ARG_INFO(0, encoding)
  345. ZEND_ARG_INFO(0, use_include_path)
  346. ZEND_END_ARG_INFO()
  347. /* }}} */
  348. static const zend_function_entry tidy_functions[] = {
  349. PHP_FE(tidy_getopt, arginfo_tidy_getopt)
  350. PHP_FE(tidy_parse_string, arginfo_tidy_parse_string)
  351. PHP_FE(tidy_parse_file, arginfo_tidy_parse_file)
  352. PHP_FE(tidy_get_output, arginfo_tidy_get_output)
  353. PHP_FE(tidy_get_error_buffer, arginfo_tidy_get_error_buffer)
  354. PHP_FE(tidy_clean_repair, arginfo_tidy_clean_repair)
  355. PHP_FE(tidy_repair_string, arginfo_tidy_repair_string)
  356. PHP_FE(tidy_repair_file, arginfo_tidy_repair_file)
  357. PHP_FE(tidy_diagnose, arginfo_tidy_diagnose)
  358. PHP_FE(tidy_get_release, arginfo_tidy_get_release)
  359. PHP_FE(tidy_get_config, arginfo_tidy_get_config)
  360. PHP_FE(tidy_get_status, arginfo_tidy_get_status)
  361. PHP_FE(tidy_get_html_ver, arginfo_tidy_get_html_ver)
  362. PHP_FE(tidy_is_xhtml, arginfo_tidy_is_xhtml)
  363. PHP_FE(tidy_is_xml, arginfo_tidy_is_xml)
  364. PHP_FE(tidy_error_count, arginfo_tidy_error_count)
  365. PHP_FE(tidy_warning_count, arginfo_tidy_warning_count)
  366. PHP_FE(tidy_access_count, arginfo_tidy_access_count)
  367. PHP_FE(tidy_config_count, arginfo_tidy_config_count)
  368. #if HAVE_TIDYOPTGETDOC
  369. PHP_FE(tidy_get_opt_doc, arginfo_tidy_get_opt_doc)
  370. #endif
  371. PHP_FE(tidy_get_root, arginfo_tidy_get_root)
  372. PHP_FE(tidy_get_head, arginfo_tidy_get_head)
  373. PHP_FE(tidy_get_html, arginfo_tidy_get_html)
  374. PHP_FE(tidy_get_body, arginfo_tidy_get_body)
  375. PHP_FE_END
  376. };
  377. static const zend_function_entry tidy_funcs_doc[] = {
  378. TIDY_METHOD_MAP(getOpt, tidy_getopt, arginfo_tidy_getopt)
  379. TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
  380. TIDY_DOC_ME(parseFile, arginfo_tidy_parse_file)
  381. TIDY_DOC_ME(parseString, arginfo_tidy_parse_string)
  382. TIDY_METHOD_MAP(repairString, tidy_repair_string, arginfo_tidy_repair_string)
  383. TIDY_METHOD_MAP(repairFile, tidy_repair_file, arginfo_tidy_repair_file)
  384. TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
  385. TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
  386. TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
  387. TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
  388. TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
  389. #if HAVE_TIDYOPTGETDOC
  390. TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, arginfo_tidy_get_opt_doc_method)
  391. #endif
  392. TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
  393. TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
  394. TIDY_METHOD_MAP(root, tidy_get_root, NULL)
  395. TIDY_METHOD_MAP(head, tidy_get_head, NULL)
  396. TIDY_METHOD_MAP(html, tidy_get_html, NULL)
  397. TIDY_METHOD_MAP(body, tidy_get_body, NULL)
  398. TIDY_DOC_ME(__construct, arginfo_tidy_construct)
  399. PHP_FE_END
  400. };
  401. static const zend_function_entry tidy_funcs_node[] = {
  402. TIDY_NODE_ME(hasChildren, NULL)
  403. TIDY_NODE_ME(hasSiblings, NULL)
  404. TIDY_NODE_ME(isComment, NULL)
  405. TIDY_NODE_ME(isHtml, NULL)
  406. TIDY_NODE_ME(isText, NULL)
  407. TIDY_NODE_ME(isJste, NULL)
  408. TIDY_NODE_ME(isAsp, NULL)
  409. TIDY_NODE_ME(isPhp, NULL)
  410. TIDY_NODE_ME(getParent, NULL)
  411. TIDY_NODE_PRIVATE_ME(__construct, NULL)
  412. PHP_FE_END
  413. };
  414. static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
  415. static zend_object_handlers tidy_object_handlers_doc;
  416. static zend_object_handlers tidy_object_handlers_node;
  417. zend_module_entry tidy_module_entry = {
  418. STANDARD_MODULE_HEADER,
  419. "tidy",
  420. tidy_functions,
  421. PHP_MINIT(tidy),
  422. PHP_MSHUTDOWN(tidy),
  423. PHP_RINIT(tidy),
  424. NULL,
  425. PHP_MINFO(tidy),
  426. PHP_TIDY_VERSION,
  427. PHP_MODULE_GLOBALS(tidy),
  428. NULL,
  429. NULL,
  430. NULL,
  431. STANDARD_MODULE_PROPERTIES_EX
  432. };
  433. #ifdef COMPILE_DL_TIDY
  434. #ifdef ZTS
  435. ZEND_TSRMLS_CACHE_DEFINE()
  436. #endif
  437. ZEND_GET_MODULE(tidy)
  438. #endif
  439. static void* TIDY_CALL php_tidy_malloc(size_t len)
  440. {
  441. return emalloc(len);
  442. }
  443. static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
  444. {
  445. return erealloc(buf, len);
  446. }
  447. static void TIDY_CALL php_tidy_free(void *buf)
  448. {
  449. efree(buf);
  450. }
  451. static void TIDY_CALL php_tidy_panic(ctmbstr msg)
  452. {
  453. php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
  454. }
  455. static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
  456. {
  457. TidyOption opt = tidyGetOptionByName(doc, optname);
  458. zend_string *str, *tmp_str;
  459. zend_long lval;
  460. if (!opt) {
  461. php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
  462. return FAILURE;
  463. }
  464. if (tidyOptIsReadOnly(opt)) {
  465. php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname);
  466. return FAILURE;
  467. }
  468. switch(tidyOptGetType(opt)) {
  469. case TidyString:
  470. str = zval_get_tmp_string(value, &tmp_str);
  471. if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
  472. zend_tmp_string_release(tmp_str);
  473. return SUCCESS;
  474. }
  475. zend_tmp_string_release(tmp_str);
  476. break;
  477. case TidyInteger:
  478. lval = zval_get_long(value);
  479. if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
  480. return SUCCESS;
  481. }
  482. break;
  483. case TidyBoolean:
  484. lval = zval_get_long(value);
  485. if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
  486. return SUCCESS;
  487. }
  488. break;
  489. default:
  490. php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
  491. break;
  492. }
  493. return FAILURE;
  494. }
  495. static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
  496. {
  497. char *enc = NULL;
  498. size_t enc_len = 0;
  499. zend_bool use_include_path = 0;
  500. TidyDoc doc;
  501. TidyBuffer *errbuf;
  502. zend_string *data, *arg1;
  503. zval *config = NULL;
  504. if (is_file) {
  505. if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
  506. RETURN_FALSE;
  507. }
  508. if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
  509. RETURN_FALSE;
  510. }
  511. } else {
  512. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
  513. RETURN_FALSE;
  514. }
  515. data = arg1;
  516. }
  517. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
  518. php_error_docref(NULL, E_WARNING, "Input string is too long");
  519. RETURN_FALSE;
  520. }
  521. doc = tidyCreate();
  522. errbuf = emalloc(sizeof(TidyBuffer));
  523. tidyBufInit(errbuf);
  524. if (tidySetErrorBuffer(doc, errbuf) != 0) {
  525. tidyBufFree(errbuf);
  526. efree(errbuf);
  527. tidyRelease(doc);
  528. php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
  529. }
  530. tidyOptSetBool(doc, TidyForceOutput, yes);
  531. tidyOptSetBool(doc, TidyMark, no);
  532. TIDY_SET_DEFAULT_CONFIG(doc);
  533. if (config) {
  534. TIDY_APPLY_CONFIG_ZVAL(doc, config);
  535. }
  536. if(enc_len) {
  537. if (tidySetCharEncoding(doc, enc) < 0) {
  538. php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
  539. RETVAL_FALSE;
  540. }
  541. }
  542. if (data) {
  543. TidyBuffer buf;
  544. tidyBufInit(&buf);
  545. tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
  546. if (tidyParseBuffer(doc, &buf) < 0) {
  547. php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
  548. RETVAL_FALSE;
  549. } else {
  550. if (tidyCleanAndRepair(doc) >= 0) {
  551. TidyBuffer output;
  552. tidyBufInit(&output);
  553. tidySaveBuffer (doc, &output);
  554. FIX_BUFFER(&output);
  555. RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
  556. tidyBufFree(&output);
  557. } else {
  558. RETVAL_FALSE;
  559. }
  560. }
  561. }
  562. if (is_file) {
  563. zend_string_release_ex(data, 0);
  564. }
  565. tidyBufFree(errbuf);
  566. efree(errbuf);
  567. tidyRelease(doc);
  568. }
  569. static zend_string *php_tidy_file_to_mem(char *filename, zend_bool use_include_path)
  570. {
  571. php_stream *stream;
  572. zend_string *data = NULL;
  573. if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
  574. return NULL;
  575. }
  576. if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
  577. data = ZSTR_EMPTY_ALLOC();
  578. }
  579. php_stream_close(stream);
  580. return data;
  581. }
  582. static void tidy_object_free_storage(zend_object *object)
  583. {
  584. PHPTidyObj *intern = php_tidy_fetch_object(object);
  585. zend_object_std_dtor(&intern->std);
  586. if (intern->ptdoc) {
  587. intern->ptdoc->ref_count--;
  588. if (intern->ptdoc->ref_count <= 0) {
  589. tidyBufFree(intern->ptdoc->errbuf);
  590. efree(intern->ptdoc->errbuf);
  591. tidyRelease(intern->ptdoc->doc);
  592. efree(intern->ptdoc);
  593. }
  594. }
  595. }
  596. static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
  597. {
  598. PHPTidyObj *intern;
  599. intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
  600. zend_object_std_init(&intern->std, class_type);
  601. object_properties_init(&intern->std, class_type);
  602. switch(objtype) {
  603. case is_node:
  604. break;
  605. case is_doc:
  606. intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
  607. intern->ptdoc->doc = tidyCreate();
  608. intern->ptdoc->ref_count = 1;
  609. intern->ptdoc->initialized = 0;
  610. intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
  611. tidyBufInit(intern->ptdoc->errbuf);
  612. if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
  613. tidyBufFree(intern->ptdoc->errbuf);
  614. efree(intern->ptdoc->errbuf);
  615. tidyRelease(intern->ptdoc->doc);
  616. efree(intern->ptdoc);
  617. efree(intern);
  618. php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
  619. }
  620. tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
  621. tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
  622. TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
  623. tidy_add_default_properties(intern, is_doc);
  624. break;
  625. }
  626. intern->std.handlers = handlers;
  627. return &intern->std;
  628. }
  629. static zend_object *tidy_object_new_node(zend_class_entry *class_type)
  630. {
  631. return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
  632. }
  633. static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
  634. {
  635. return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
  636. }
  637. static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
  638. {
  639. object_init_ex(object, pce);
  640. return object;
  641. }
  642. static int tidy_doc_cast_handler(zval *in, zval *out, int type)
  643. {
  644. TidyBuffer output;
  645. PHPTidyObj *obj;
  646. switch (type) {
  647. case IS_LONG:
  648. case _IS_NUMBER:
  649. ZVAL_LONG(out, 0);
  650. break;
  651. case IS_DOUBLE:
  652. ZVAL_DOUBLE(out, 0);
  653. break;
  654. case _IS_BOOL:
  655. ZVAL_TRUE(out);
  656. break;
  657. case IS_STRING:
  658. obj = Z_TIDY_P(in);
  659. tidyBufInit(&output);
  660. tidySaveBuffer (obj->ptdoc->doc, &output);
  661. ZVAL_STRINGL(out, (char *) output.bp, output.size ? output.size-1 : 0);
  662. tidyBufFree(&output);
  663. break;
  664. default:
  665. return FAILURE;
  666. }
  667. return SUCCESS;
  668. }
  669. static int tidy_node_cast_handler(zval *in, zval *out, int type)
  670. {
  671. TidyBuffer buf;
  672. PHPTidyObj *obj;
  673. switch(type) {
  674. case IS_LONG:
  675. case _IS_NUMBER:
  676. ZVAL_LONG(out, 0);
  677. break;
  678. case IS_DOUBLE:
  679. ZVAL_DOUBLE(out, 0);
  680. break;
  681. case _IS_BOOL:
  682. ZVAL_TRUE(out);
  683. break;
  684. case IS_STRING:
  685. obj = Z_TIDY_P(in);
  686. tidyBufInit(&buf);
  687. if (obj->ptdoc) {
  688. tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
  689. ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
  690. } else {
  691. ZVAL_EMPTY_STRING(out);
  692. }
  693. tidyBufFree(&buf);
  694. break;
  695. default:
  696. return FAILURE;
  697. }
  698. return SUCCESS;
  699. }
  700. static void tidy_doc_update_properties(PHPTidyObj *obj)
  701. {
  702. TidyBuffer output;
  703. zval temp;
  704. tidyBufInit(&output);
  705. tidySaveBuffer (obj->ptdoc->doc, &output);
  706. if (output.size) {
  707. if (!obj->std.properties) {
  708. rebuild_object_properties(&obj->std);
  709. }
  710. ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
  711. zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
  712. }
  713. tidyBufFree(&output);
  714. if (obj->ptdoc->errbuf->size) {
  715. if (!obj->std.properties) {
  716. rebuild_object_properties(&obj->std);
  717. }
  718. ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
  719. zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
  720. }
  721. }
  722. static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
  723. {
  724. TidyBuffer buf;
  725. TidyAttr tempattr;
  726. TidyNode tempnode;
  727. zval attribute, children, temp;
  728. PHPTidyObj *newobj;
  729. switch(type) {
  730. case is_node:
  731. if (!obj->std.properties) {
  732. rebuild_object_properties(&obj->std);
  733. }
  734. tidyBufInit(&buf);
  735. tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
  736. ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
  737. tidyBufFree(&buf);
  738. ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
  739. ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
  740. ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
  741. ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
  742. ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
  743. switch(tidyNodeGetType(obj->node)) {
  744. case TidyNode_Root:
  745. case TidyNode_DocType:
  746. case TidyNode_Text:
  747. case TidyNode_Comment:
  748. break;
  749. default:
  750. ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
  751. }
  752. tempattr = tidyAttrFirst(obj->node);
  753. if (tempattr) {
  754. char *name, *val;
  755. array_init(&attribute);
  756. do {
  757. name = (char *)tidyAttrName(tempattr);
  758. val = (char *)tidyAttrValue(tempattr);
  759. if (name && val) {
  760. add_assoc_string(&attribute, name, val);
  761. }
  762. } while((tempattr = tidyAttrNext(tempattr)));
  763. } else {
  764. ZVAL_NULL(&attribute);
  765. }
  766. zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
  767. tempnode = tidyGetChild(obj->node);
  768. if (tempnode) {
  769. array_init(&children);
  770. do {
  771. tidy_instanciate(tidy_ce_node, &temp);
  772. newobj = Z_TIDY_P(&temp);
  773. newobj->node = tempnode;
  774. newobj->type = is_node;
  775. newobj->ptdoc = obj->ptdoc;
  776. newobj->ptdoc->ref_count++;
  777. tidy_add_default_properties(newobj, is_node);
  778. add_next_index_zval(&children, &temp);
  779. } while((tempnode = tidyGetNext(tempnode)));
  780. } else {
  781. ZVAL_NULL(&children);
  782. }
  783. zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);
  784. break;
  785. case is_doc:
  786. if (!obj->std.properties) {
  787. rebuild_object_properties(&obj->std);
  788. }
  789. ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
  790. ADD_PROPERTY_NULL(obj->std.properties, value);
  791. break;
  792. }
  793. }
  794. static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
  795. {
  796. *type = tidyOptGetType(opt);
  797. switch (*type) {
  798. case TidyString: {
  799. char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
  800. if (val) {
  801. return (void *) zend_string_init(val, strlen(val), 0);
  802. } else {
  803. return (void *) ZSTR_EMPTY_ALLOC();
  804. }
  805. }
  806. break;
  807. case TidyInteger:
  808. return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
  809. break;
  810. case TidyBoolean:
  811. return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
  812. break;
  813. }
  814. /* should not happen */
  815. return NULL;
  816. }
  817. static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
  818. {
  819. PHPTidyObj *newobj;
  820. TidyNode node;
  821. TIDY_FETCH_OBJECT;
  822. switch (node_type) {
  823. case is_root_node:
  824. node = tidyGetRoot(obj->ptdoc->doc);
  825. break;
  826. case is_html_node:
  827. node = tidyGetHtml(obj->ptdoc->doc);
  828. break;
  829. case is_head_node:
  830. node = tidyGetHead(obj->ptdoc->doc);
  831. break;
  832. case is_body_node:
  833. node = tidyGetBody(obj->ptdoc->doc);
  834. break;
  835. default:
  836. RETURN_NULL();
  837. break;
  838. }
  839. if (!node) {
  840. RETURN_NULL();
  841. }
  842. tidy_instanciate(tidy_ce_node, return_value);
  843. newobj = Z_TIDY_P(return_value);
  844. newobj->type = is_node;
  845. newobj->ptdoc = obj->ptdoc;
  846. newobj->node = node;
  847. newobj->ptdoc->ref_count++;
  848. tidy_add_default_properties(newobj, is_node);
  849. }
  850. static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
  851. {
  852. zval *opt_val;
  853. zend_string *opt_name;
  854. ZEND_HASH_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
  855. if (opt_name == NULL) {
  856. continue;
  857. }
  858. _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
  859. } ZEND_HASH_FOREACH_END();
  860. return SUCCESS;
  861. }
  862. static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
  863. {
  864. TidyBuffer buf;
  865. if(enc) {
  866. if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
  867. php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
  868. return FAILURE;
  869. }
  870. }
  871. obj->ptdoc->initialized = 1;
  872. tidyBufInit(&buf);
  873. tidyBufAttach(&buf, (byte *) string, len);
  874. if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
  875. php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
  876. return FAILURE;
  877. }
  878. tidy_doc_update_properties(obj);
  879. return SUCCESS;
  880. }
  881. static PHP_MINIT_FUNCTION(tidy)
  882. {
  883. tidySetMallocCall(php_tidy_malloc);
  884. tidySetReallocCall(php_tidy_realloc);
  885. tidySetFreeCall(php_tidy_free);
  886. tidySetPanicCall(php_tidy_panic);
  887. REGISTER_INI_ENTRIES();
  888. REGISTER_TIDY_CLASS(tidy, doc, NULL, 0);
  889. REGISTER_TIDY_CLASS(tidyNode, node, NULL, ZEND_ACC_FINAL);
  890. tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
  891. tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
  892. tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
  893. tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
  894. _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
  895. _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
  896. php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
  897. return SUCCESS;
  898. }
  899. static PHP_RINIT_FUNCTION(tidy)
  900. {
  901. #if defined(COMPILE_DL_TIDY) && defined(ZTS)
  902. ZEND_TSRMLS_CACHE_UPDATE();
  903. #endif
  904. php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
  905. return SUCCESS;
  906. }
  907. static PHP_MSHUTDOWN_FUNCTION(tidy)
  908. {
  909. UNREGISTER_INI_ENTRIES();
  910. return SUCCESS;
  911. }
  912. static PHP_MINFO_FUNCTION(tidy)
  913. {
  914. php_info_print_table_start();
  915. php_info_print_table_row(2, "Tidy support", "enabled");
  916. #if HAVE_TIDYBUFFIO_H
  917. php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
  918. #elif HAVE_TIDYP_H
  919. php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
  920. #endif
  921. #if HAVE_TIDYRELEASEDATE
  922. php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
  923. #endif
  924. php_info_print_table_end();
  925. DISPLAY_INI_ENTRIES();
  926. }
  927. static PHP_INI_MH(php_tidy_set_clean_output)
  928. {
  929. int status;
  930. zend_bool value;
  931. if (ZSTR_LEN(new_value)==2 && strcasecmp("on", ZSTR_VAL(new_value))==0) {
  932. value = (zend_bool) 1;
  933. } else if (ZSTR_LEN(new_value)==3 && strcasecmp("yes", ZSTR_VAL(new_value))==0) {
  934. value = (zend_bool) 1;
  935. } else if (ZSTR_LEN(new_value)==4 && strcasecmp("true", ZSTR_VAL(new_value))==0) {
  936. value = (zend_bool) 1;
  937. } else {
  938. value = (zend_bool) atoi(ZSTR_VAL(new_value));
  939. }
  940. if (stage == PHP_INI_STAGE_RUNTIME) {
  941. status = php_output_get_status();
  942. if (value && (status & PHP_OUTPUT_WRITTEN)) {
  943. php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
  944. return FAILURE;
  945. }
  946. if (status & PHP_OUTPUT_SENT) {
  947. php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
  948. return FAILURE;
  949. }
  950. }
  951. status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
  952. if (stage == PHP_INI_STAGE_RUNTIME && value) {
  953. if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
  954. php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
  955. }
  956. }
  957. return status;
  958. }
  959. /*
  960. * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
  961. */
  962. static void php_tidy_clean_output_start(const char *name, size_t name_len)
  963. {
  964. php_output_handler *h;
  965. if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
  966. php_output_handler_start(h);
  967. }
  968. }
  969. static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
  970. {
  971. if (chunk_size) {
  972. php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
  973. return NULL;
  974. }
  975. if (!TG(clean_output)) {
  976. TG(clean_output) = 1;
  977. }
  978. return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
  979. }
  980. static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
  981. {
  982. int status = FAILURE;
  983. TidyDoc doc;
  984. TidyBuffer inbuf, outbuf, errbuf;
  985. if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
  986. doc = tidyCreate();
  987. tidyBufInit(&errbuf);
  988. if (0 == tidySetErrorBuffer(doc, &errbuf)) {
  989. tidyOptSetBool(doc, TidyForceOutput, yes);
  990. tidyOptSetBool(doc, TidyMark, no);
  991. if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
  992. php_error_docref(NULL, E_WARNING, "Input string is too long");
  993. return status;
  994. }
  995. TIDY_SET_DEFAULT_CONFIG(doc);
  996. tidyBufInit(&inbuf);
  997. tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
  998. if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
  999. tidyBufInit(&outbuf);
  1000. tidySaveBuffer(doc, &outbuf);
  1001. FIX_BUFFER(&outbuf);
  1002. output_context->out.data = (char *) outbuf.bp;
  1003. output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
  1004. output_context->out.free = 1;
  1005. status = SUCCESS;
  1006. }
  1007. }
  1008. tidyRelease(doc);
  1009. tidyBufFree(&errbuf);
  1010. }
  1011. return status;
  1012. }
  1013. /* {{{ proto bool tidy_parse_string(string input [, mixed config_options [, string encoding]])
  1014. Parse a document stored in a string */
  1015. static PHP_FUNCTION(tidy_parse_string)
  1016. {
  1017. char *enc = NULL;
  1018. size_t enc_len = 0;
  1019. zend_string *input;
  1020. zval *options = NULL;
  1021. PHPTidyObj *obj;
  1022. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
  1023. RETURN_FALSE;
  1024. }
  1025. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
  1026. php_error_docref(NULL, E_WARNING, "Input string is too long");
  1027. RETURN_FALSE;
  1028. }
  1029. tidy_instanciate(tidy_ce_doc, return_value);
  1030. obj = Z_TIDY_P(return_value);
  1031. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1032. if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
  1033. zval_ptr_dtor(return_value);
  1034. RETURN_FALSE;
  1035. }
  1036. }
  1037. /* }}} */
  1038. /* {{{ proto string tidy_get_error_buffer()
  1039. Return warnings and errors which occurred parsing the specified document*/
  1040. static PHP_FUNCTION(tidy_get_error_buffer)
  1041. {
  1042. TIDY_FETCH_OBJECT;
  1043. if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
  1044. RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
  1045. } else {
  1046. RETURN_FALSE;
  1047. }
  1048. }
  1049. /* }}} */
  1050. /* {{{ proto string tidy_get_output(tidy tidy)
  1051. Return a string representing the parsed tidy markup */
  1052. static PHP_FUNCTION(tidy_get_output)
  1053. {
  1054. TidyBuffer output;
  1055. TIDY_FETCH_OBJECT;
  1056. tidyBufInit(&output);
  1057. tidySaveBuffer(obj->ptdoc->doc, &output);
  1058. FIX_BUFFER(&output);
  1059. RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
  1060. tidyBufFree(&output);
  1061. }
  1062. /* }}} */
  1063. /* {{{ proto bool tidy_parse_file(string file [, mixed config_options [, string encoding [, bool use_include_path]]])
  1064. Parse markup in file or URI */
  1065. static PHP_FUNCTION(tidy_parse_file)
  1066. {
  1067. char *enc = NULL;
  1068. size_t enc_len = 0;
  1069. zend_bool use_include_path = 0;
  1070. zend_string *inputfile, *contents;
  1071. zval *options = NULL;
  1072. PHPTidyObj *obj;
  1073. if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
  1074. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1075. RETURN_FALSE;
  1076. }
  1077. tidy_instanciate(tidy_ce_doc, return_value);
  1078. obj = Z_TIDY_P(return_value);
  1079. if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
  1080. php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
  1081. RETURN_FALSE;
  1082. }
  1083. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
  1084. php_error_docref(NULL, E_WARNING, "Input string is too long");
  1085. RETURN_FALSE;
  1086. }
  1087. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1088. if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
  1089. zval_ptr_dtor(return_value);
  1090. RETVAL_FALSE;
  1091. }
  1092. zend_string_release_ex(contents, 0);
  1093. }
  1094. /* }}} */
  1095. /* {{{ proto bool tidy_clean_repair(tidy tidy)
  1096. Execute configured cleanup and repair operations on parsed markup */
  1097. static PHP_FUNCTION(tidy_clean_repair)
  1098. {
  1099. TIDY_FETCH_OBJECT;
  1100. if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
  1101. tidy_doc_update_properties(obj);
  1102. RETURN_TRUE;
  1103. }
  1104. RETURN_FALSE;
  1105. }
  1106. /* }}} */
  1107. /* {{{ proto bool tidy_repair_string(string data [, mixed config_file [, string encoding]])
  1108. Repair a string using an optionally provided configuration file */
  1109. static PHP_FUNCTION(tidy_repair_string)
  1110. {
  1111. php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
  1112. }
  1113. /* }}} */
  1114. /* {{{ proto bool tidy_repair_file(string filename [, mixed config_file [, string encoding [, bool use_include_path]]])
  1115. Repair a file using an optionally provided configuration file */
  1116. static PHP_FUNCTION(tidy_repair_file)
  1117. {
  1118. php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
  1119. }
  1120. /* }}} */
  1121. /* {{{ proto bool tidy_diagnose()
  1122. Run configured diagnostics on parsed and repaired markup. */
  1123. static PHP_FUNCTION(tidy_diagnose)
  1124. {
  1125. TIDY_FETCH_OBJECT;
  1126. if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
  1127. tidy_doc_update_properties(obj);
  1128. RETURN_TRUE;
  1129. }
  1130. RETURN_FALSE;
  1131. }
  1132. /* }}} */
  1133. /* {{{ proto string tidy_get_release()
  1134. Get release date (version) for Tidy library */
  1135. static PHP_FUNCTION(tidy_get_release)
  1136. {
  1137. if (zend_parse_parameters_none() == FAILURE) {
  1138. return;
  1139. }
  1140. #if HAVE_TIDYRELEASEDATE
  1141. RETURN_STRING((char *)tidyReleaseDate());
  1142. #else
  1143. RETURN_STRING((char *)"unknown");
  1144. #endif
  1145. }
  1146. /* }}} */
  1147. #if HAVE_TIDYOPTGETDOC
  1148. /* {{{ proto string tidy_get_opt_doc(tidy resource, string optname)
  1149. Returns the documentation for the given option name */
  1150. static PHP_FUNCTION(tidy_get_opt_doc)
  1151. {
  1152. PHPTidyObj *obj;
  1153. char *optval, *optname;
  1154. size_t optname_len;
  1155. TidyOption opt;
  1156. TIDY_SET_CONTEXT;
  1157. if (object) {
  1158. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
  1159. RETURN_FALSE;
  1160. }
  1161. } else {
  1162. if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
  1163. RETURN_FALSE;
  1164. }
  1165. }
  1166. obj = Z_TIDY_P(object);
  1167. opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
  1168. if (!opt) {
  1169. php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
  1170. RETURN_FALSE;
  1171. }
  1172. if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
  1173. RETURN_STRING(optval);
  1174. }
  1175. RETURN_FALSE;
  1176. }
  1177. /* }}} */
  1178. #endif
  1179. /* {{{ proto array tidy_get_config(tidy tidy)
  1180. Get current Tidy configuration */
  1181. static PHP_FUNCTION(tidy_get_config)
  1182. {
  1183. TidyIterator itOpt;
  1184. char *opt_name;
  1185. void *opt_value;
  1186. TidyOptionType optt;
  1187. TIDY_FETCH_OBJECT;
  1188. itOpt = tidyGetOptionList(obj->ptdoc->doc);
  1189. array_init(return_value);
  1190. while (itOpt) {
  1191. TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
  1192. opt_name = (char *)tidyOptGetName(opt);
  1193. opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
  1194. switch (optt) {
  1195. case TidyString:
  1196. add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
  1197. break;
  1198. case TidyInteger:
  1199. add_assoc_long(return_value, opt_name, (zend_long)opt_value);
  1200. break;
  1201. case TidyBoolean:
  1202. add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
  1203. break;
  1204. }
  1205. }
  1206. return;
  1207. }
  1208. /* }}} */
  1209. /* {{{ proto int tidy_get_status(tidy tidy)
  1210. Get status of specified document. */
  1211. static PHP_FUNCTION(tidy_get_status)
  1212. {
  1213. TIDY_FETCH_OBJECT;
  1214. RETURN_LONG(tidyStatus(obj->ptdoc->doc));
  1215. }
  1216. /* }}} */
  1217. /* {{{ proto int tidy_get_html_ver(tidy tidy)
  1218. Get the Detected HTML version for the specified document. */
  1219. static PHP_FUNCTION(tidy_get_html_ver)
  1220. {
  1221. TIDY_FETCH_INITIALIZED_OBJECT;
  1222. RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
  1223. }
  1224. /* }}} */
  1225. /* {{{ proto bool tidy_is_xhtml(tidy tidy)
  1226. Indicates if the document is a XHTML document. */
  1227. static PHP_FUNCTION(tidy_is_xhtml)
  1228. {
  1229. TIDY_FETCH_INITIALIZED_OBJECT;
  1230. RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
  1231. }
  1232. /* }}} */
  1233. /* {{{ proto bool tidy_is_xml(tidy tidy)
  1234. Indicates if the document is a generic (non HTML/XHTML) XML document. */
  1235. static PHP_FUNCTION(tidy_is_xml)
  1236. {
  1237. TIDY_FETCH_INITIALIZED_OBJECT;
  1238. RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
  1239. }
  1240. /* }}} */
  1241. /* {{{ proto int tidy_error_count(tidy tidy)
  1242. Returns the Number of Tidy errors encountered for specified document. */
  1243. static PHP_FUNCTION(tidy_error_count)
  1244. {
  1245. TIDY_FETCH_OBJECT;
  1246. RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
  1247. }
  1248. /* }}} */
  1249. /* {{{ proto int tidy_warning_count(tidy tidy)
  1250. Returns the Number of Tidy warnings encountered for specified document. */
  1251. static PHP_FUNCTION(tidy_warning_count)
  1252. {
  1253. TIDY_FETCH_OBJECT;
  1254. RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
  1255. }
  1256. /* }}} */
  1257. /* {{{ proto int tidy_access_count(tidy tidy)
  1258. Returns the Number of Tidy accessibility warnings encountered for specified document. */
  1259. static PHP_FUNCTION(tidy_access_count)
  1260. {
  1261. TIDY_FETCH_OBJECT;
  1262. RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
  1263. }
  1264. /* }}} */
  1265. /* {{{ proto int tidy_config_count(tidy tidy)
  1266. Returns the Number of Tidy configuration errors encountered for specified document. */
  1267. static PHP_FUNCTION(tidy_config_count)
  1268. {
  1269. TIDY_FETCH_OBJECT;
  1270. RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
  1271. }
  1272. /* }}} */
  1273. /* {{{ proto mixed tidy_getopt(string option)
  1274. Returns the value of the specified configuration option for the tidy document. */
  1275. static PHP_FUNCTION(tidy_getopt)
  1276. {
  1277. PHPTidyObj *obj;
  1278. char *optname;
  1279. void *optval;
  1280. size_t optname_len;
  1281. TidyOption opt;
  1282. TidyOptionType optt;
  1283. TIDY_SET_CONTEXT;
  1284. if (object) {
  1285. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
  1286. RETURN_FALSE;
  1287. }
  1288. } else {
  1289. if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
  1290. RETURN_FALSE;
  1291. }
  1292. }
  1293. obj = Z_TIDY_P(object);
  1294. opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
  1295. if (!opt) {
  1296. php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
  1297. RETURN_FALSE;
  1298. }
  1299. optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
  1300. switch (optt) {
  1301. case TidyString:
  1302. RETVAL_STR((zend_string*)optval);
  1303. return;
  1304. case TidyInteger:
  1305. RETURN_LONG((zend_long)optval);
  1306. break;
  1307. case TidyBoolean:
  1308. if (optval) {
  1309. RETURN_TRUE;
  1310. } else {
  1311. RETURN_FALSE;
  1312. }
  1313. break;
  1314. default:
  1315. php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
  1316. break;
  1317. }
  1318. RETURN_FALSE;
  1319. }
  1320. /* }}} */
  1321. static TIDY_DOC_METHOD(__construct)
  1322. {
  1323. char *enc = NULL;
  1324. size_t enc_len = 0;
  1325. zend_bool use_include_path = 0;
  1326. zval *options = NULL;
  1327. zend_string *contents, *inputfile = NULL;
  1328. PHPTidyObj *obj;
  1329. TIDY_SET_CONTEXT;
  1330. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|Pzsb", &inputfile,
  1331. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1332. RETURN_FALSE;
  1333. }
  1334. obj = Z_TIDY_P(object);
  1335. if (inputfile) {
  1336. if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
  1337. php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
  1338. return;
  1339. }
  1340. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
  1341. php_error_docref(NULL, E_WARNING, "Input string is too long");
  1342. RETURN_FALSE;
  1343. }
  1344. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1345. php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
  1346. zend_string_release_ex(contents, 0);
  1347. }
  1348. }
  1349. static TIDY_DOC_METHOD(parseFile)
  1350. {
  1351. char *enc = NULL;
  1352. size_t enc_len = 0;
  1353. zend_bool use_include_path = 0;
  1354. zval *options = NULL;
  1355. zend_string *inputfile, *contents;
  1356. PHPTidyObj *obj;
  1357. TIDY_SET_CONTEXT;
  1358. obj = Z_TIDY_P(object);
  1359. if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
  1360. &options, &enc, &enc_len, &use_include_path) == FAILURE) {
  1361. RETURN_FALSE;
  1362. }
  1363. if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
  1364. php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
  1365. RETURN_FALSE;
  1366. }
  1367. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
  1368. php_error_docref(NULL, E_WARNING, "Input string is too long");
  1369. RETURN_FALSE;
  1370. }
  1371. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1372. if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
  1373. RETVAL_FALSE;
  1374. } else {
  1375. RETVAL_TRUE;
  1376. }
  1377. zend_string_release_ex(contents, 0);
  1378. }
  1379. static TIDY_DOC_METHOD(parseString)
  1380. {
  1381. char *enc = NULL;
  1382. size_t enc_len = 0;
  1383. zval *options = NULL;
  1384. PHPTidyObj *obj;
  1385. zend_string *input;
  1386. TIDY_SET_CONTEXT;
  1387. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
  1388. RETURN_FALSE;
  1389. }
  1390. if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
  1391. php_error_docref(NULL, E_WARNING, "Input string is too long");
  1392. RETURN_FALSE;
  1393. }
  1394. obj = Z_TIDY_P(object);
  1395. TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
  1396. if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
  1397. RETURN_TRUE;
  1398. }
  1399. RETURN_FALSE;
  1400. }
  1401. /* {{{ proto TidyNode tidy_get_root()
  1402. Returns a TidyNode Object representing the root of the tidy parse tree */
  1403. static PHP_FUNCTION(tidy_get_root)
  1404. {
  1405. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
  1406. }
  1407. /* }}} */
  1408. /* {{{ proto TidyNode tidy_get_html()
  1409. Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
  1410. static PHP_FUNCTION(tidy_get_html)
  1411. {
  1412. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
  1413. }
  1414. /* }}} */
  1415. /* {{{ proto TidyNode tidy_get_head()
  1416. Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
  1417. static PHP_FUNCTION(tidy_get_head)
  1418. {
  1419. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
  1420. }
  1421. /* }}} */
  1422. /* {{{ proto TidyNode tidy_get_body(tidy tidy)
  1423. Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
  1424. static PHP_FUNCTION(tidy_get_body)
  1425. {
  1426. php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
  1427. }
  1428. /* }}} */
  1429. /* {{{ proto bool tidyNode::hasChildren()
  1430. Returns true if this node has children */
  1431. static TIDY_NODE_METHOD(hasChildren)
  1432. {
  1433. TIDY_FETCH_ONLY_OBJECT;
  1434. if (tidyGetChild(obj->node)) {
  1435. RETURN_TRUE;
  1436. } else {
  1437. RETURN_FALSE;
  1438. }
  1439. }
  1440. /* }}} */
  1441. /* {{{ proto bool tidyNode::hasSiblings()
  1442. Returns true if this node has siblings */
  1443. static TIDY_NODE_METHOD(hasSiblings)
  1444. {
  1445. TIDY_FETCH_ONLY_OBJECT;
  1446. if (obj->node && tidyGetNext(obj->node)) {
  1447. RETURN_TRUE;
  1448. } else {
  1449. RETURN_FALSE;
  1450. }
  1451. }
  1452. /* }}} */
  1453. /* {{{ proto bool tidyNode::isComment()
  1454. Returns true if this node represents a comment */
  1455. static TIDY_NODE_METHOD(isComment)
  1456. {
  1457. TIDY_FETCH_ONLY_OBJECT;
  1458. if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
  1459. RETURN_TRUE;
  1460. } else {
  1461. RETURN_FALSE;
  1462. }
  1463. }
  1464. /* }}} */
  1465. /* {{{ proto bool tidyNode::isHtml()
  1466. Returns true if this node is part of a HTML document */
  1467. static TIDY_NODE_METHOD(isHtml)
  1468. {
  1469. TIDY_FETCH_ONLY_OBJECT;
  1470. switch (tidyNodeGetType(obj->node)) {
  1471. case TidyNode_Start:
  1472. case TidyNode_End:
  1473. case TidyNode_StartEnd:
  1474. RETURN_TRUE;
  1475. default:
  1476. RETURN_FALSE;
  1477. }
  1478. }
  1479. /* }}} */
  1480. /* {{{ proto bool tidyNode::isText()
  1481. Returns true if this node represents text (no markup) */
  1482. static TIDY_NODE_METHOD(isText)
  1483. {
  1484. TIDY_FETCH_ONLY_OBJECT;
  1485. if (tidyNodeGetType(obj->node) == TidyNode_Text) {
  1486. RETURN_TRUE;
  1487. } else {
  1488. RETURN_FALSE;
  1489. }
  1490. }
  1491. /* }}} */
  1492. /* {{{ proto bool tidyNode::isJste()
  1493. Returns true if this node is JSTE */
  1494. static TIDY_NODE_METHOD(isJste)
  1495. {
  1496. TIDY_FETCH_ONLY_OBJECT;
  1497. if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
  1498. RETURN_TRUE;
  1499. } else {
  1500. RETURN_FALSE;
  1501. }
  1502. }
  1503. /* }}} */
  1504. /* {{{ proto bool tidyNode::isAsp()
  1505. Returns true if this node is ASP */
  1506. static TIDY_NODE_METHOD(isAsp)
  1507. {
  1508. TIDY_FETCH_ONLY_OBJECT;
  1509. if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
  1510. RETURN_TRUE;
  1511. } else {
  1512. RETURN_FALSE;
  1513. }
  1514. }
  1515. /* }}} */
  1516. /* {{{ proto bool tidyNode::isPhp()
  1517. Returns true if this node is PHP */
  1518. static TIDY_NODE_METHOD(isPhp)
  1519. {
  1520. TIDY_FETCH_ONLY_OBJECT;
  1521. if (tidyNodeGetType(obj->node) == TidyNode_Php) {
  1522. RETURN_TRUE;
  1523. } else {
  1524. RETURN_FALSE;
  1525. }
  1526. }
  1527. /* }}} */
  1528. /* {{{ proto tidyNode tidyNode::getParent()
  1529. Returns the parent node if available or NULL */
  1530. static TIDY_NODE_METHOD(getParent)
  1531. {
  1532. TidyNode parent_node;
  1533. PHPTidyObj *newobj;
  1534. TIDY_FETCH_ONLY_OBJECT;
  1535. parent_node = tidyGetParent(obj->node);
  1536. if(parent_node) {
  1537. tidy_instanciate(tidy_ce_node, return_value);
  1538. newobj = Z_TIDY_P(return_value);
  1539. newobj->node = parent_node;
  1540. newobj->type = is_node;
  1541. newobj->ptdoc = obj->ptdoc;
  1542. newobj->ptdoc->ref_count++;
  1543. tidy_add_default_properties(newobj, is_node);
  1544. } else {
  1545. ZVAL_NULL(return_value);
  1546. }
  1547. }
  1548. /* }}} */
  1549. /* {{{ proto tidyNode::__construct()
  1550. __constructor for tidyNode. */
  1551. static TIDY_NODE_METHOD(__construct)
  1552. {
  1553. zend_throw_error(NULL, "You should not create a tidyNode manually");
  1554. }
  1555. /* }}} */
  1556. static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
  1557. {
  1558. TIDY_NODE_CONST(ROOT, Root);
  1559. TIDY_NODE_CONST(DOCTYPE, DocType);
  1560. TIDY_NODE_CONST(COMMENT, Comment);
  1561. TIDY_NODE_CONST(PROCINS, ProcIns);
  1562. TIDY_NODE_CONST(TEXT, Text);
  1563. TIDY_NODE_CONST(START, Start);
  1564. TIDY_NODE_CONST(END, End);
  1565. TIDY_NODE_CONST(STARTEND, StartEnd);
  1566. TIDY_NODE_CONST(CDATA, CDATA);
  1567. TIDY_NODE_CONST(SECTION, Section);
  1568. TIDY_NODE_CONST(ASP, Asp);
  1569. TIDY_NODE_CONST(JSTE, Jste);
  1570. TIDY_NODE_CONST(PHP, Php);
  1571. TIDY_NODE_CONST(XMLDECL, XmlDecl);
  1572. }
  1573. static void _php_tidy_register_tags(INIT_FUNC_ARGS)
  1574. {
  1575. TIDY_TAG_CONST(UNKNOWN);
  1576. TIDY_TAG_CONST(A);
  1577. TIDY_TAG_CONST(ABBR);
  1578. TIDY_TAG_CONST(ACRONYM);
  1579. TIDY_TAG_CONST(ADDRESS);
  1580. TIDY_TAG_CONST(ALIGN);
  1581. TIDY_TAG_CONST(APPLET);
  1582. TIDY_TAG_CONST(AREA);
  1583. TIDY_TAG_CONST(B);
  1584. TIDY_TAG_CONST(BASE);
  1585. TIDY_TAG_CONST(BASEFONT);
  1586. TIDY_TAG_CONST(BDO);
  1587. TIDY_TAG_CONST(BGSOUND);
  1588. TIDY_TAG_CONST(BIG);
  1589. TIDY_TAG_CONST(BLINK);
  1590. TIDY_TAG_CONST(BLOCKQUOTE);
  1591. TIDY_TAG_CONST(BODY);
  1592. TIDY_TAG_CONST(BR);
  1593. TIDY_TAG_CONST(BUTTON);
  1594. TIDY_TAG_CONST(CAPTION);
  1595. TIDY_TAG_CONST(CENTER);
  1596. TIDY_TAG_CONST(CITE);
  1597. TIDY_TAG_CONST(CODE);
  1598. TIDY_TAG_CONST(COL);
  1599. TIDY_TAG_CONST(COLGROUP);
  1600. TIDY_TAG_CONST(COMMENT);
  1601. TIDY_TAG_CONST(DD);
  1602. TIDY_TAG_CONST(DEL);
  1603. TIDY_TAG_CONST(DFN);
  1604. TIDY_TAG_CONST(DIR);
  1605. TIDY_TAG_CONST(DIV);
  1606. TIDY_TAG_CONST(DL);
  1607. TIDY_TAG_CONST(DT);
  1608. TIDY_TAG_CONST(EM);
  1609. TIDY_TAG_CONST(EMBED);
  1610. TIDY_TAG_CONST(FIELDSET);
  1611. TIDY_TAG_CONST(FONT);
  1612. TIDY_TAG_CONST(FORM);
  1613. TIDY_TAG_CONST(FRAME);
  1614. TIDY_TAG_CONST(FRAMESET);
  1615. TIDY_TAG_CONST(H1);
  1616. TIDY_TAG_CONST(H2);
  1617. TIDY_TAG_CONST(H3);
  1618. TIDY_TAG_CONST(H4);
  1619. TIDY_TAG_CONST(H5);
  1620. TIDY_TAG_CONST(H6);
  1621. TIDY_TAG_CONST(HEAD);
  1622. TIDY_TAG_CONST(HR);
  1623. TIDY_TAG_CONST(HTML);
  1624. TIDY_TAG_CONST(I);
  1625. TIDY_TAG_CONST(IFRAME);
  1626. TIDY_TAG_CONST(ILAYER);
  1627. TIDY_TAG_CONST(IMG);
  1628. TIDY_TAG_CONST(INPUT);
  1629. TIDY_TAG_CONST(INS);
  1630. TIDY_TAG_CONST(ISINDEX);
  1631. TIDY_TAG_CONST(KBD);
  1632. TIDY_TAG_CONST(KEYGEN);
  1633. TIDY_TAG_CONST(LABEL);
  1634. TIDY_TAG_CONST(LAYER);
  1635. TIDY_TAG_CONST(LEGEND);
  1636. TIDY_TAG_CONST(LI);
  1637. TIDY_TAG_CONST(LINK);
  1638. TIDY_TAG_CONST(LISTING);
  1639. TIDY_TAG_CONST(MAP);
  1640. TIDY_TAG_CONST(MARQUEE);
  1641. TIDY_TAG_CONST(MENU);
  1642. TIDY_TAG_CONST(META);
  1643. TIDY_TAG_CONST(MULTICOL);
  1644. TIDY_TAG_CONST(NOBR);
  1645. TIDY_TAG_CONST(NOEMBED);
  1646. TIDY_TAG_CONST(NOFRAMES);
  1647. TIDY_TAG_CONST(NOLAYER);
  1648. TIDY_TAG_CONST(NOSAVE);
  1649. TIDY_TAG_CONST(NOSCRIPT);
  1650. TIDY_TAG_CONST(OBJECT);
  1651. TIDY_TAG_CONST(OL);
  1652. TIDY_TAG_CONST(OPTGROUP);
  1653. TIDY_TAG_CONST(OPTION);
  1654. TIDY_TAG_CONST(P);
  1655. TIDY_TAG_CONST(PARAM);
  1656. TIDY_TAG_CONST(PLAINTEXT);
  1657. TIDY_TAG_CONST(PRE);
  1658. TIDY_TAG_CONST(Q);
  1659. TIDY_TAG_CONST(RB);
  1660. TIDY_TAG_CONST(RBC);
  1661. TIDY_TAG_CONST(RP);
  1662. TIDY_TAG_CONST(RT);
  1663. TIDY_TAG_CONST(RTC);
  1664. TIDY_TAG_CONST(RUBY);
  1665. TIDY_TAG_CONST(S);
  1666. TIDY_TAG_CONST(SAMP);
  1667. TIDY_TAG_CONST(SCRIPT);
  1668. TIDY_TAG_CONST(SELECT);
  1669. TIDY_TAG_CONST(SERVER);
  1670. TIDY_TAG_CONST(SERVLET);
  1671. TIDY_TAG_CONST(SMALL);
  1672. TIDY_TAG_CONST(SPACER);
  1673. TIDY_TAG_CONST(SPAN);
  1674. TIDY_TAG_CONST(STRIKE);
  1675. TIDY_TAG_CONST(STRONG);
  1676. TIDY_TAG_CONST(STYLE);
  1677. TIDY_TAG_CONST(SUB);
  1678. TIDY_TAG_CONST(SUP);
  1679. TIDY_TAG_CONST(TABLE);
  1680. TIDY_TAG_CONST(TBODY);
  1681. TIDY_TAG_CONST(TD);
  1682. TIDY_TAG_CONST(TEXTAREA);
  1683. TIDY_TAG_CONST(TFOOT);
  1684. TIDY_TAG_CONST(TH);
  1685. TIDY_TAG_CONST(THEAD);
  1686. TIDY_TAG_CONST(TITLE);
  1687. TIDY_TAG_CONST(TR);
  1688. TIDY_TAG_CONST(TT);
  1689. TIDY_TAG_CONST(U);
  1690. TIDY_TAG_CONST(UL);
  1691. TIDY_TAG_CONST(VAR);
  1692. TIDY_TAG_CONST(WBR);
  1693. TIDY_TAG_CONST(XMP);
  1694. }
  1695. #endif
  1696. /*
  1697. * Local variables:
  1698. * tab-width: 4
  1699. * c-basic-offset: 4
  1700. * End:
  1701. * vim600: noet sw=4 ts=4 fdm=marker
  1702. * vim<600: noet sw=4 ts=4
  1703. */