xml.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Stig Sæther Bakken <ssb@php.net> |
  14. | Thies C. Arntzen <thies@thieso.net> |
  15. | Sterling Hughes <sterling@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. #ifdef HAVE_CONFIG_H
  19. #include "config.h"
  20. #endif
  21. #include "php.h"
  22. #include "zend_variables.h"
  23. #include "ext/standard/php_string.h"
  24. #include "ext/standard/info.h"
  25. #include "ext/standard/html.h"
  26. #ifdef HAVE_XML
  27. #include "php_xml.h"
  28. # include "ext/standard/head.h"
  29. #ifdef LIBXML_EXPAT_COMPAT
  30. #include "ext/libxml/php_libxml.h"
  31. #endif
  32. #include "xml_arginfo.h"
  33. /* Short-term TODO list:
  34. * - Implement XML_ExternalEntityParserCreate()
  35. * - XML_SetCommentHandler
  36. * - XML_SetCdataSectionHandler
  37. * - XML_SetParamEntityParsing
  38. */
  39. /* Long-term TODO list:
  40. * - Fix the expat library so you can install your own memory manager
  41. * functions
  42. */
  43. /* Known bugs:
  44. * - Weird things happen with <![CDATA[]]> sections.
  45. */
  46. ZEND_BEGIN_MODULE_GLOBALS(xml)
  47. XML_Char *default_encoding;
  48. ZEND_END_MODULE_GLOBALS(xml)
  49. ZEND_DECLARE_MODULE_GLOBALS(xml)
  50. #define XML(v) ZEND_MODULE_GLOBALS_ACCESSOR(xml, v)
  51. typedef struct {
  52. int case_folding;
  53. XML_Parser parser;
  54. XML_Char *target_encoding;
  55. /* Reference to the object itself, for convenience.
  56. * It is not owned, do not release it. */
  57. zval index;
  58. /* We return a pointer to these zvals in get_gc(), so it's
  59. * important that a) they are adjacent b) object is the first
  60. * and c) the number of zvals is kept up to date. */
  61. #define XML_PARSER_NUM_ZVALS 12
  62. zval object;
  63. zval startElementHandler;
  64. zval endElementHandler;
  65. zval characterDataHandler;
  66. zval processingInstructionHandler;
  67. zval defaultHandler;
  68. zval unparsedEntityDeclHandler;
  69. zval notationDeclHandler;
  70. zval externalEntityRefHandler;
  71. zval unknownEncodingHandler;
  72. zval startNamespaceDeclHandler;
  73. zval endNamespaceDeclHandler;
  74. zend_function *startElementPtr;
  75. zend_function *endElementPtr;
  76. zend_function *characterDataPtr;
  77. zend_function *processingInstructionPtr;
  78. zend_function *defaultPtr;
  79. zend_function *unparsedEntityDeclPtr;
  80. zend_function *notationDeclPtr;
  81. zend_function *externalEntityRefPtr;
  82. zend_function *unknownEncodingPtr;
  83. zend_function *startNamespaceDeclPtr;
  84. zend_function *endNamespaceDeclPtr;
  85. zval data;
  86. zval info;
  87. int level;
  88. int toffset;
  89. int curtag;
  90. zval *ctag;
  91. char **ltags;
  92. int lastwasopen;
  93. int skipwhite;
  94. int isparsing;
  95. XML_Char *baseURI;
  96. zend_object std;
  97. } xml_parser;
  98. typedef struct {
  99. XML_Char *name;
  100. char (*decoding_function)(unsigned short);
  101. unsigned short (*encoding_function)(unsigned char);
  102. } xml_encoding;
  103. enum php_xml_option {
  104. PHP_XML_OPTION_CASE_FOLDING = 1,
  105. PHP_XML_OPTION_TARGET_ENCODING,
  106. PHP_XML_OPTION_SKIP_TAGSTART,
  107. PHP_XML_OPTION_SKIP_WHITE
  108. };
  109. /* {{{ dynamically loadable module stuff */
  110. #ifdef COMPILE_DL_XML
  111. #ifdef ZTS
  112. ZEND_TSRMLS_CACHE_DEFINE()
  113. #endif
  114. ZEND_GET_MODULE(xml)
  115. #endif /* COMPILE_DL_XML */
  116. /* }}} */
  117. #define XML_MAXLEVEL 255 /* XXX this should be dynamic */
  118. #define SKIP_TAGSTART(str) ((str) + (parser->toffset > (int)strlen(str) ? strlen(str) : parser->toffset))
  119. static zend_class_entry *xml_parser_ce;
  120. static zend_object_handlers xml_parser_object_handlers;
  121. /* {{{ function prototypes */
  122. PHP_MINIT_FUNCTION(xml);
  123. PHP_MINFO_FUNCTION(xml);
  124. static PHP_GINIT_FUNCTION(xml);
  125. static zend_object *xml_parser_create_object(zend_class_entry *class_type);
  126. static void xml_parser_free_obj(zend_object *object);
  127. static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n);
  128. static zend_function *xml_parser_get_constructor(zend_object *object);
  129. static zend_string *xml_utf8_decode(const XML_Char *, size_t, const XML_Char *);
  130. static void xml_set_handler(zval *, zval *);
  131. inline static unsigned short xml_encode_iso_8859_1(unsigned char);
  132. inline static char xml_decode_iso_8859_1(unsigned short);
  133. inline static unsigned short xml_encode_us_ascii(unsigned char);
  134. inline static char xml_decode_us_ascii(unsigned short);
  135. static void xml_call_handler(xml_parser *, zval *, zend_function *, int, zval *, zval *);
  136. static void _xml_xmlchar_zval(const XML_Char *, int, const XML_Char *, zval *);
  137. static int _xml_xmlcharlen(const XML_Char *);
  138. static void _xml_add_to_info(xml_parser *parser,char *name);
  139. inline static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag);
  140. void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
  141. void _xml_endElementHandler(void *, const XML_Char *);
  142. void _xml_characterDataHandler(void *, const XML_Char *, int);
  143. void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
  144. void _xml_defaultHandler(void *, const XML_Char *, int);
  145. void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  146. void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  147. int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
  148. void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
  149. void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
  150. /* }}} */
  151. #ifdef LIBXML_EXPAT_COMPAT
  152. static const zend_module_dep xml_deps[] = {
  153. ZEND_MOD_REQUIRED("libxml")
  154. ZEND_MOD_END
  155. };
  156. #endif
  157. zend_module_entry xml_module_entry = {
  158. #ifdef LIBXML_EXPAT_COMPAT
  159. STANDARD_MODULE_HEADER_EX, NULL,
  160. xml_deps,
  161. #else
  162. STANDARD_MODULE_HEADER,
  163. #endif
  164. "xml", /* extension name */
  165. ext_functions, /* extension function list */
  166. PHP_MINIT(xml), /* extension-wide startup function */
  167. NULL, /* extension-wide shutdown function */
  168. NULL, /* per-request startup function */
  169. NULL, /* per-request shutdown function */
  170. PHP_MINFO(xml), /* information function */
  171. PHP_XML_VERSION,
  172. PHP_MODULE_GLOBALS(xml), /* globals descriptor */
  173. PHP_GINIT(xml), /* globals ctor */
  174. NULL, /* globals dtor */
  175. NULL, /* post deactivate */
  176. STANDARD_MODULE_PROPERTIES_EX
  177. };
  178. /* All the encoding functions are set to NULL right now, since all
  179. * the encoding is currently done internally by expat/xmltok.
  180. */
  181. const xml_encoding xml_encodings[] = {
  182. { (XML_Char *)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
  183. { (XML_Char *)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
  184. { (XML_Char *)"UTF-8", NULL, NULL },
  185. { (XML_Char *)NULL, NULL, NULL }
  186. };
  187. static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
  188. /* }}} */
  189. /* {{{ startup, shutdown and info functions */
  190. static PHP_GINIT_FUNCTION(xml)
  191. {
  192. #if defined(COMPILE_DL_XML) && defined(ZTS)
  193. ZEND_TSRMLS_CACHE_UPDATE();
  194. #endif
  195. xml_globals->default_encoding = (XML_Char*)"UTF-8";
  196. }
  197. static void *php_xml_malloc_wrapper(size_t sz)
  198. {
  199. return emalloc(sz);
  200. }
  201. static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
  202. {
  203. return erealloc(ptr, sz);
  204. }
  205. static void php_xml_free_wrapper(void *ptr)
  206. {
  207. if (ptr != NULL) {
  208. efree(ptr);
  209. }
  210. }
  211. PHP_MINIT_FUNCTION(xml)
  212. {
  213. xml_parser_ce = register_class_XMLParser();
  214. xml_parser_ce->create_object = xml_parser_create_object;
  215. memcpy(&xml_parser_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
  216. xml_parser_object_handlers.offset = XtOffsetOf(xml_parser, std);
  217. xml_parser_object_handlers.free_obj = xml_parser_free_obj;
  218. xml_parser_object_handlers.get_gc = xml_parser_get_gc;
  219. xml_parser_object_handlers.get_constructor = xml_parser_get_constructor;
  220. xml_parser_object_handlers.clone_obj = NULL;
  221. xml_parser_object_handlers.compare = zend_objects_not_comparable;
  222. REGISTER_LONG_CONSTANT("XML_ERROR_NONE", XML_ERROR_NONE, CONST_CS|CONST_PERSISTENT);
  223. REGISTER_LONG_CONSTANT("XML_ERROR_NO_MEMORY", XML_ERROR_NO_MEMORY, CONST_CS|CONST_PERSISTENT);
  224. REGISTER_LONG_CONSTANT("XML_ERROR_SYNTAX", XML_ERROR_SYNTAX, CONST_CS|CONST_PERSISTENT);
  225. REGISTER_LONG_CONSTANT("XML_ERROR_NO_ELEMENTS", XML_ERROR_NO_ELEMENTS, CONST_CS|CONST_PERSISTENT);
  226. REGISTER_LONG_CONSTANT("XML_ERROR_INVALID_TOKEN", XML_ERROR_INVALID_TOKEN, CONST_CS|CONST_PERSISTENT);
  227. REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_TOKEN", XML_ERROR_UNCLOSED_TOKEN, CONST_CS|CONST_PERSISTENT);
  228. REGISTER_LONG_CONSTANT("XML_ERROR_PARTIAL_CHAR", XML_ERROR_PARTIAL_CHAR, CONST_CS|CONST_PERSISTENT);
  229. REGISTER_LONG_CONSTANT("XML_ERROR_TAG_MISMATCH", XML_ERROR_TAG_MISMATCH, CONST_CS|CONST_PERSISTENT);
  230. REGISTER_LONG_CONSTANT("XML_ERROR_DUPLICATE_ATTRIBUTE", XML_ERROR_DUPLICATE_ATTRIBUTE, CONST_CS|CONST_PERSISTENT);
  231. REGISTER_LONG_CONSTANT("XML_ERROR_JUNK_AFTER_DOC_ELEMENT", XML_ERROR_JUNK_AFTER_DOC_ELEMENT, CONST_CS|CONST_PERSISTENT);
  232. REGISTER_LONG_CONSTANT("XML_ERROR_PARAM_ENTITY_REF", XML_ERROR_PARAM_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  233. REGISTER_LONG_CONSTANT("XML_ERROR_UNDEFINED_ENTITY", XML_ERROR_UNDEFINED_ENTITY, CONST_CS|CONST_PERSISTENT);
  234. REGISTER_LONG_CONSTANT("XML_ERROR_RECURSIVE_ENTITY_REF", XML_ERROR_RECURSIVE_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  235. REGISTER_LONG_CONSTANT("XML_ERROR_ASYNC_ENTITY", XML_ERROR_ASYNC_ENTITY, CONST_CS|CONST_PERSISTENT);
  236. REGISTER_LONG_CONSTANT("XML_ERROR_BAD_CHAR_REF", XML_ERROR_BAD_CHAR_REF, CONST_CS|CONST_PERSISTENT);
  237. REGISTER_LONG_CONSTANT("XML_ERROR_BINARY_ENTITY_REF", XML_ERROR_BINARY_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  238. REGISTER_LONG_CONSTANT("XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
  239. REGISTER_LONG_CONSTANT("XML_ERROR_MISPLACED_XML_PI", XML_ERROR_MISPLACED_XML_PI, CONST_CS|CONST_PERSISTENT);
  240. REGISTER_LONG_CONSTANT("XML_ERROR_UNKNOWN_ENCODING", XML_ERROR_UNKNOWN_ENCODING, CONST_CS|CONST_PERSISTENT);
  241. REGISTER_LONG_CONSTANT("XML_ERROR_INCORRECT_ENCODING", XML_ERROR_INCORRECT_ENCODING, CONST_CS|CONST_PERSISTENT);
  242. REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_CDATA_SECTION", XML_ERROR_UNCLOSED_CDATA_SECTION, CONST_CS|CONST_PERSISTENT);
  243. REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
  244. REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
  245. REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
  246. REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT);
  247. REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT);
  248. /* this object should not be pre-initialised at compile time,
  249. as the order of members may vary */
  250. php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
  251. php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
  252. php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
  253. #ifdef LIBXML_EXPAT_COMPAT
  254. REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "libxml", CONST_CS|CONST_PERSISTENT);
  255. #else
  256. REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "expat", CONST_CS|CONST_PERSISTENT);
  257. #endif
  258. return SUCCESS;
  259. }
  260. PHP_MINFO_FUNCTION(xml)
  261. {
  262. php_info_print_table_start();
  263. php_info_print_table_row(2, "XML Support", "active");
  264. php_info_print_table_row(2, "XML Namespace Support", "active");
  265. #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
  266. php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
  267. #else
  268. php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
  269. #endif
  270. php_info_print_table_end();
  271. }
  272. /* }}} */
  273. /* {{{ extension-internal functions */
  274. static void _xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding, zval *ret)
  275. {
  276. if (s == NULL) {
  277. ZVAL_FALSE(ret);
  278. return;
  279. }
  280. if (len == 0) {
  281. len = _xml_xmlcharlen(s);
  282. }
  283. ZVAL_STR(ret, xml_utf8_decode(s, len, encoding));
  284. }
  285. /* }}} */
  286. static inline xml_parser *xml_parser_from_obj(zend_object *obj) {
  287. return (xml_parser *)((char *)(obj) - XtOffsetOf(xml_parser, std));
  288. }
  289. #define Z_XMLPARSER_P(zv) xml_parser_from_obj(Z_OBJ_P(zv))
  290. static zend_object *xml_parser_create_object(zend_class_entry *class_type) {
  291. xml_parser *intern = zend_object_alloc(sizeof(xml_parser), class_type);
  292. memset(intern, 0, sizeof(xml_parser) - sizeof(zend_object));
  293. zend_object_std_init(&intern->std, class_type);
  294. object_properties_init(&intern->std, class_type);
  295. intern->std.handlers = &xml_parser_object_handlers;
  296. return &intern->std;
  297. }
  298. static void xml_parser_free_obj(zend_object *object)
  299. {
  300. xml_parser *parser = xml_parser_from_obj(object);
  301. if (parser->parser) {
  302. XML_ParserFree(parser->parser);
  303. }
  304. if (parser->ltags) {
  305. int inx;
  306. for (inx = 0; ((inx < parser->level) && (inx < XML_MAXLEVEL)); inx++)
  307. efree(parser->ltags[ inx ]);
  308. efree(parser->ltags);
  309. }
  310. if (!Z_ISUNDEF(parser->startElementHandler)) {
  311. zval_ptr_dtor(&parser->startElementHandler);
  312. }
  313. if (!Z_ISUNDEF(parser->endElementHandler)) {
  314. zval_ptr_dtor(&parser->endElementHandler);
  315. }
  316. if (!Z_ISUNDEF(parser->characterDataHandler)) {
  317. zval_ptr_dtor(&parser->characterDataHandler);
  318. }
  319. if (!Z_ISUNDEF(parser->processingInstructionHandler)) {
  320. zval_ptr_dtor(&parser->processingInstructionHandler);
  321. }
  322. if (!Z_ISUNDEF(parser->defaultHandler)) {
  323. zval_ptr_dtor(&parser->defaultHandler);
  324. }
  325. if (!Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
  326. zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
  327. }
  328. if (!Z_ISUNDEF(parser->notationDeclHandler)) {
  329. zval_ptr_dtor(&parser->notationDeclHandler);
  330. }
  331. if (!Z_ISUNDEF(parser->externalEntityRefHandler)) {
  332. zval_ptr_dtor(&parser->externalEntityRefHandler);
  333. }
  334. if (!Z_ISUNDEF(parser->unknownEncodingHandler)) {
  335. zval_ptr_dtor(&parser->unknownEncodingHandler);
  336. }
  337. if (!Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
  338. zval_ptr_dtor(&parser->startNamespaceDeclHandler);
  339. }
  340. if (!Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
  341. zval_ptr_dtor(&parser->endNamespaceDeclHandler);
  342. }
  343. if (parser->baseURI) {
  344. efree(parser->baseURI);
  345. }
  346. if (!Z_ISUNDEF(parser->object)) {
  347. zval_ptr_dtor(&parser->object);
  348. }
  349. zend_object_std_dtor(&parser->std);
  350. }
  351. static HashTable *xml_parser_get_gc(zend_object *object, zval **table, int *n)
  352. {
  353. xml_parser *parser = xml_parser_from_obj(object);
  354. *table = &parser->object;
  355. *n = XML_PARSER_NUM_ZVALS;
  356. return zend_std_get_properties(object);
  357. }
  358. static zend_function *xml_parser_get_constructor(zend_object *object) {
  359. zend_throw_error(NULL, "Cannot directly construct XMLParser, use xml_parser_create() or xml_parser_create_ns() instead");
  360. return NULL;
  361. }
  362. /* {{{ xml_set_handler() */
  363. static void xml_set_handler(zval *handler, zval *data)
  364. {
  365. /* If we have already a handler, release it */
  366. if (handler) {
  367. zval_ptr_dtor(handler);
  368. }
  369. /* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
  370. if (Z_TYPE_P(data) != IS_ARRAY && Z_TYPE_P(data) != IS_OBJECT) {
  371. convert_to_string(data);
  372. if (Z_STRLEN_P(data) == 0) {
  373. ZVAL_UNDEF(handler);
  374. return;
  375. }
  376. }
  377. ZVAL_COPY(handler, data);
  378. }
  379. /* }}} */
  380. /* {{{ xml_call_handler() */
  381. static void xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval *argv, zval *retval)
  382. {
  383. int i;
  384. ZVAL_UNDEF(retval);
  385. if (parser && handler && !EG(exception)) {
  386. int result;
  387. zend_fcall_info fci;
  388. fci.size = sizeof(fci);
  389. ZVAL_COPY_VALUE(&fci.function_name, handler);
  390. fci.object = Z_OBJ(parser->object);
  391. fci.retval = retval;
  392. fci.param_count = argc;
  393. fci.params = argv;
  394. fci.named_params = NULL;
  395. result = zend_call_function(&fci, NULL);
  396. if (result == FAILURE) {
  397. zval *method;
  398. zval *obj;
  399. if (Z_TYPE_P(handler) == IS_STRING) {
  400. php_error_docref(NULL, E_WARNING, "Unable to call handler %s()", Z_STRVAL_P(handler));
  401. } else if (Z_TYPE_P(handler) == IS_ARRAY &&
  402. (obj = zend_hash_index_find(Z_ARRVAL_P(handler), 0)) != NULL &&
  403. (method = zend_hash_index_find(Z_ARRVAL_P(handler), 1)) != NULL &&
  404. Z_TYPE_P(obj) == IS_OBJECT &&
  405. Z_TYPE_P(method) == IS_STRING) {
  406. php_error_docref(NULL, E_WARNING, "Unable to call handler %s::%s()", ZSTR_VAL(Z_OBJCE_P(obj)->name), Z_STRVAL_P(method));
  407. } else
  408. php_error_docref(NULL, E_WARNING, "Unable to call handler");
  409. }
  410. }
  411. for (i = 0; i < argc; i++) {
  412. zval_ptr_dtor(&argv[i]);
  413. }
  414. }
  415. /* }}} */
  416. /* {{{ xml_encode_iso_8859_1() */
  417. inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
  418. {
  419. return (unsigned short)c;
  420. }
  421. /* }}} */
  422. /* {{{ xml_decode_iso_8859_1() */
  423. inline static char xml_decode_iso_8859_1(unsigned short c)
  424. {
  425. return (char)(c > 0xff ? '?' : c);
  426. }
  427. /* }}} */
  428. /* {{{ xml_encode_us_ascii() */
  429. inline static unsigned short xml_encode_us_ascii(unsigned char c)
  430. {
  431. return (unsigned short)c;
  432. }
  433. /* }}} */
  434. /* {{{ xml_decode_us_ascii() */
  435. inline static char xml_decode_us_ascii(unsigned short c)
  436. {
  437. return (char)(c > 0x7f ? '?' : c);
  438. }
  439. /* }}} */
  440. /* {{{ xml_get_encoding() */
  441. static const xml_encoding *xml_get_encoding(const XML_Char *name)
  442. {
  443. const xml_encoding *enc = &xml_encodings[0];
  444. while (enc && enc->name) {
  445. if (strcasecmp((char *)name, (char *)enc->name) == 0) {
  446. return enc;
  447. }
  448. enc++;
  449. }
  450. return NULL;
  451. }
  452. /* }}} */
  453. /* {{{ xml_utf8_decode() */
  454. static zend_string *xml_utf8_decode(const XML_Char *s, size_t len, const XML_Char *encoding)
  455. {
  456. size_t pos = 0;
  457. unsigned int c;
  458. char (*decoder)(unsigned short) = NULL;
  459. const xml_encoding *enc = xml_get_encoding(encoding);
  460. zend_string *str;
  461. if (enc) {
  462. decoder = enc->decoding_function;
  463. }
  464. if (decoder == NULL) {
  465. /* If the target encoding was unknown, or no decoder function
  466. * was specified, return the UTF-8-encoded data as-is.
  467. */
  468. str = zend_string_init((char *)s, len, 0);
  469. return str;
  470. }
  471. str = zend_string_alloc(len, 0);
  472. ZSTR_LEN(str) = 0;
  473. while (pos < len) {
  474. int status = FAILURE;
  475. c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
  476. if (status == FAILURE || c > 0xFFU) {
  477. c = '?';
  478. }
  479. ZSTR_VAL(str)[ZSTR_LEN(str)++] = decoder ? (unsigned int)decoder(c) : c;
  480. }
  481. ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
  482. if (ZSTR_LEN(str) < len) {
  483. str = zend_string_truncate(str, ZSTR_LEN(str), 0);
  484. }
  485. return str;
  486. }
  487. /* }}} */
  488. /* {{{ _xml_xmlcharlen() */
  489. static int _xml_xmlcharlen(const XML_Char *s)
  490. {
  491. int len = 0;
  492. while (*s) {
  493. len++;
  494. s++;
  495. }
  496. return len;
  497. }
  498. /* }}} */
  499. /* {{{ _xml_add_to_info() */
  500. static void _xml_add_to_info(xml_parser *parser,char *name)
  501. {
  502. zval *element;
  503. if (Z_ISUNDEF(parser->info)) {
  504. return;
  505. }
  506. if ((element = zend_hash_str_find(Z_ARRVAL(parser->info), name, strlen(name))) == NULL) {
  507. zval values;
  508. array_init(&values);
  509. element = zend_hash_str_update(Z_ARRVAL(parser->info), name, strlen(name), &values);
  510. }
  511. add_next_index_long(element, parser->curtag);
  512. parser->curtag++;
  513. }
  514. /* }}} */
  515. /* {{{ _xml_decode_tag() */
  516. static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag)
  517. {
  518. zend_string *str;
  519. str = xml_utf8_decode((const XML_Char *)tag, strlen(tag), parser->target_encoding);
  520. if (parser->case_folding) {
  521. php_strtoupper(ZSTR_VAL(str), ZSTR_LEN(str));
  522. }
  523. return str;
  524. }
  525. /* }}} */
  526. /* {{{ _xml_startElementHandler() */
  527. void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
  528. {
  529. xml_parser *parser = (xml_parser *)userData;
  530. const char **attrs = (const char **) attributes;
  531. zend_string *att, *tag_name, *val;
  532. zval retval, args[3];
  533. if (parser) {
  534. parser->level++;
  535. tag_name = _xml_decode_tag(parser, (const char *)name);
  536. if (!Z_ISUNDEF(parser->startElementHandler)) {
  537. ZVAL_COPY(&args[0], &parser->index);
  538. ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
  539. array_init(&args[2]);
  540. while (attributes && *attributes) {
  541. zval tmp;
  542. att = _xml_decode_tag(parser, (const char *)attributes[0]);
  543. val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
  544. ZVAL_STR(&tmp, val);
  545. zend_symtable_update(Z_ARRVAL(args[2]), att, &tmp);
  546. attributes += 2;
  547. zend_string_release_ex(att, 0);
  548. }
  549. xml_call_handler(parser, &parser->startElementHandler, parser->startElementPtr, 3, args, &retval);
  550. zval_ptr_dtor(&retval);
  551. }
  552. if (!Z_ISUNDEF(parser->data)) {
  553. if (parser->level <= XML_MAXLEVEL) {
  554. zval tag, atr;
  555. int atcnt = 0;
  556. array_init(&tag);
  557. array_init(&atr);
  558. _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
  559. add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
  560. add_assoc_string(&tag, "type", "open");
  561. add_assoc_long(&tag, "level", parser->level);
  562. parser->ltags[parser->level-1] = estrdup(ZSTR_VAL(tag_name));
  563. parser->lastwasopen = 1;
  564. attributes = (const XML_Char **) attrs;
  565. while (attributes && *attributes) {
  566. zval tmp;
  567. att = _xml_decode_tag(parser, (const char *)attributes[0]);
  568. val = xml_utf8_decode(attributes[1], strlen((char *)attributes[1]), parser->target_encoding);
  569. ZVAL_STR(&tmp, val);
  570. zend_symtable_update(Z_ARRVAL(atr), att, &tmp);
  571. atcnt++;
  572. attributes += 2;
  573. zend_string_release_ex(att, 0);
  574. }
  575. if (atcnt) {
  576. zend_hash_str_add(Z_ARRVAL(tag), "attributes", sizeof("attributes") - 1, &atr);
  577. } else {
  578. zval_ptr_dtor(&atr);
  579. }
  580. parser->ctag = zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
  581. } else if (parser->level == (XML_MAXLEVEL + 1)) {
  582. php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
  583. }
  584. }
  585. zend_string_release_ex(tag_name, 0);
  586. }
  587. }
  588. /* }}} */
  589. /* {{{ _xml_endElementHandler() */
  590. void _xml_endElementHandler(void *userData, const XML_Char *name)
  591. {
  592. xml_parser *parser = (xml_parser *)userData;
  593. if (parser) {
  594. zval retval, args[2];
  595. zend_string *tag_name = _xml_decode_tag(parser, (const char *)name);
  596. if (!Z_ISUNDEF(parser->endElementHandler)) {
  597. ZVAL_COPY(&args[0], &parser->index);
  598. ZVAL_STRING(&args[1], SKIP_TAGSTART(ZSTR_VAL(tag_name)));
  599. xml_call_handler(parser, &parser->endElementHandler, parser->endElementPtr, 2, args, &retval);
  600. zval_ptr_dtor(&retval);
  601. }
  602. if (!Z_ISUNDEF(parser->data)) {
  603. zval tag;
  604. if (parser->lastwasopen) {
  605. add_assoc_string(parser->ctag, "type", "complete");
  606. } else {
  607. array_init(&tag);
  608. _xml_add_to_info(parser, ZSTR_VAL(tag_name) + parser->toffset);
  609. add_assoc_string(&tag, "tag", SKIP_TAGSTART(ZSTR_VAL(tag_name))); /* cast to avoid gcc-warning */
  610. add_assoc_string(&tag, "type", "close");
  611. add_assoc_long(&tag, "level", parser->level);
  612. zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
  613. }
  614. parser->lastwasopen = 0;
  615. }
  616. zend_string_release_ex(tag_name, 0);
  617. if ((parser->ltags) && (parser->level <= XML_MAXLEVEL)) {
  618. efree(parser->ltags[parser->level-1]);
  619. }
  620. parser->level--;
  621. }
  622. }
  623. /* }}} */
  624. /* {{{ _xml_characterDataHandler() */
  625. void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
  626. {
  627. xml_parser *parser = (xml_parser *)userData;
  628. if (parser) {
  629. zval retval, args[2];
  630. if (!Z_ISUNDEF(parser->characterDataHandler)) {
  631. ZVAL_COPY(&args[0], &parser->index);
  632. _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
  633. xml_call_handler(parser, &parser->characterDataHandler, parser->characterDataPtr, 2, args, &retval);
  634. zval_ptr_dtor(&retval);
  635. }
  636. if (!Z_ISUNDEF(parser->data)) {
  637. size_t i;
  638. int doprint = 0;
  639. zend_string *decoded_value;
  640. decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
  641. if (parser->skipwhite) {
  642. for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
  643. switch (ZSTR_VAL(decoded_value)[i]) {
  644. case ' ':
  645. case '\t':
  646. case '\n':
  647. continue;
  648. default:
  649. doprint = 1;
  650. break;
  651. }
  652. if (doprint) {
  653. break;
  654. }
  655. }
  656. }
  657. if (parser->lastwasopen) {
  658. zval *myval;
  659. /* check if the current tag already has a value - if yes append to that! */
  660. if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
  661. size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
  662. Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
  663. strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
  664. ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
  665. zend_string_release_ex(decoded_value, 0);
  666. } else {
  667. if (doprint || (! parser->skipwhite)) {
  668. add_assoc_str(parser->ctag, "value", decoded_value);
  669. } else {
  670. zend_string_release_ex(decoded_value, 0);
  671. }
  672. }
  673. } else {
  674. zval tag;
  675. zval *curtag, *mytype, *myval;
  676. ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
  677. if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
  678. if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) {
  679. if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
  680. size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
  681. Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
  682. strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
  683. ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
  684. zend_string_release_ex(decoded_value, 0);
  685. return;
  686. }
  687. }
  688. }
  689. break;
  690. } ZEND_HASH_FOREACH_END();
  691. if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
  692. array_init(&tag);
  693. _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
  694. add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
  695. add_assoc_str(&tag, "value", decoded_value);
  696. add_assoc_string(&tag, "type", "cdata");
  697. add_assoc_long(&tag, "level", parser->level);
  698. zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
  699. } else if (parser->level == (XML_MAXLEVEL + 1)) {
  700. php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
  701. } else {
  702. zend_string_release_ex(decoded_value, 0);
  703. }
  704. }
  705. }
  706. }
  707. }
  708. /* }}} */
  709. /* {{{ _xml_processingInstructionHandler() */
  710. void _xml_processingInstructionHandler(void *userData, const XML_Char *target, const XML_Char *data)
  711. {
  712. xml_parser *parser = (xml_parser *)userData;
  713. if (parser && !Z_ISUNDEF(parser->processingInstructionHandler)) {
  714. zval retval, args[3];
  715. ZVAL_COPY(&args[0], &parser->index);
  716. _xml_xmlchar_zval(target, 0, parser->target_encoding, &args[1]);
  717. _xml_xmlchar_zval(data, 0, parser->target_encoding, &args[2]);
  718. xml_call_handler(parser, &parser->processingInstructionHandler, parser->processingInstructionPtr, 3, args, &retval);
  719. zval_ptr_dtor(&retval);
  720. }
  721. }
  722. /* }}} */
  723. /* {{{ _xml_defaultHandler() */
  724. void _xml_defaultHandler(void *userData, const XML_Char *s, int len)
  725. {
  726. xml_parser *parser = (xml_parser *)userData;
  727. if (parser && !Z_ISUNDEF(parser->defaultHandler)) {
  728. zval retval, args[2];
  729. ZVAL_COPY(&args[0], &parser->index);
  730. _xml_xmlchar_zval(s, len, parser->target_encoding, &args[1]);
  731. xml_call_handler(parser, &parser->defaultHandler, parser->defaultPtr, 2, args, &retval);
  732. zval_ptr_dtor(&retval);
  733. }
  734. }
  735. /* }}} */
  736. /* {{{ _xml_unparsedEntityDeclHandler() */
  737. void _xml_unparsedEntityDeclHandler(void *userData,
  738. const XML_Char *entityName,
  739. const XML_Char *base,
  740. const XML_Char *systemId,
  741. const XML_Char *publicId,
  742. const XML_Char *notationName)
  743. {
  744. xml_parser *parser = (xml_parser *)userData;
  745. if (parser && !Z_ISUNDEF(parser->unparsedEntityDeclHandler)) {
  746. zval retval, args[6];
  747. ZVAL_COPY(&args[0], &parser->index);
  748. _xml_xmlchar_zval(entityName, 0, parser->target_encoding, &args[1]);
  749. _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
  750. _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
  751. _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
  752. _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[5]);
  753. xml_call_handler(parser, &parser->unparsedEntityDeclHandler, parser->unparsedEntityDeclPtr, 6, args, &retval);
  754. zval_ptr_dtor(&retval);
  755. }
  756. }
  757. /* }}} */
  758. /* {{{ _xml_notationDeclHandler() */
  759. void _xml_notationDeclHandler(void *userData,
  760. const XML_Char *notationName,
  761. const XML_Char *base,
  762. const XML_Char *systemId,
  763. const XML_Char *publicId)
  764. {
  765. xml_parser *parser = (xml_parser *)userData;
  766. if (parser && !Z_ISUNDEF(parser->notationDeclHandler)) {
  767. zval retval, args[5];
  768. ZVAL_COPY(&args[0], &parser->index);
  769. _xml_xmlchar_zval(notationName, 0, parser->target_encoding, &args[1]);
  770. _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
  771. _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
  772. _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
  773. xml_call_handler(parser, &parser->notationDeclHandler, parser->notationDeclPtr, 5, args, &retval);
  774. zval_ptr_dtor(&retval);
  775. }
  776. }
  777. /* }}} */
  778. /* {{{ _xml_externalEntityRefHandler() */
  779. int _xml_externalEntityRefHandler(XML_Parser parserPtr,
  780. const XML_Char *openEntityNames,
  781. const XML_Char *base,
  782. const XML_Char *systemId,
  783. const XML_Char *publicId)
  784. {
  785. xml_parser *parser = XML_GetUserData(parserPtr);
  786. int ret = 0; /* abort if no handler is set (should be configurable?) */
  787. if (parser && !Z_ISUNDEF(parser->externalEntityRefHandler)) {
  788. zval retval, args[5];
  789. ZVAL_COPY(&args[0], &parser->index);
  790. _xml_xmlchar_zval(openEntityNames, 0, parser->target_encoding, &args[1]);
  791. _xml_xmlchar_zval(base, 0, parser->target_encoding, &args[2]);
  792. _xml_xmlchar_zval(systemId, 0, parser->target_encoding, &args[3]);
  793. _xml_xmlchar_zval(publicId, 0, parser->target_encoding, &args[4]);
  794. xml_call_handler(parser, &parser->externalEntityRefHandler, parser->externalEntityRefPtr, 5, args, &retval);
  795. if (!Z_ISUNDEF(retval)) {
  796. convert_to_long(&retval);
  797. ret = Z_LVAL(retval);
  798. } else {
  799. ret = 0;
  800. }
  801. }
  802. return ret;
  803. }
  804. /* }}} */
  805. /* {{{ _xml_startNamespaceDeclHandler() */
  806. void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix, const XML_Char *uri)
  807. {
  808. xml_parser *parser = (xml_parser *)userData;
  809. if (parser && !Z_ISUNDEF(parser->startNamespaceDeclHandler)) {
  810. zval retval, args[3];
  811. ZVAL_COPY(&args[0], &parser->index);
  812. _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
  813. _xml_xmlchar_zval(uri, 0, parser->target_encoding, &args[2]);
  814. xml_call_handler(parser, &parser->startNamespaceDeclHandler, parser->startNamespaceDeclPtr, 3, args, &retval);
  815. zval_ptr_dtor(&retval);
  816. }
  817. }
  818. /* }}} */
  819. /* {{{ _xml_endNamespaceDeclHandler() */
  820. void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix)
  821. {
  822. xml_parser *parser = (xml_parser *)userData;
  823. if (parser && !Z_ISUNDEF(parser->endNamespaceDeclHandler)) {
  824. zval retval, args[2];
  825. ZVAL_COPY(&args[0], &parser->index);
  826. _xml_xmlchar_zval(prefix, 0, parser->target_encoding, &args[1]);
  827. xml_call_handler(parser, &parser->endNamespaceDeclHandler, parser->endNamespaceDeclPtr, 2, args, &retval);
  828. zval_ptr_dtor(&retval);
  829. }
  830. }
  831. /* }}} */
  832. /************************* EXTENSION FUNCTIONS *************************/
  833. static void php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAMETERS, int ns_support) /* {{{ */
  834. {
  835. xml_parser *parser;
  836. int auto_detect = 0;
  837. zend_string *encoding_param = NULL;
  838. char *ns_param = NULL;
  839. size_t ns_param_len = 0;
  840. XML_Char *encoding;
  841. if (zend_parse_parameters(ZEND_NUM_ARGS(), (ns_support ? "|S!s": "|S!"), &encoding_param, &ns_param, &ns_param_len) == FAILURE) {
  842. RETURN_THROWS();
  843. }
  844. if (encoding_param != NULL) {
  845. /* The supported encoding types are hardcoded here because
  846. * we are limited to the encodings supported by expat/xmltok.
  847. */
  848. if (ZSTR_LEN(encoding_param) == 0) {
  849. encoding = XML(default_encoding);
  850. auto_detect = 1;
  851. } else if (zend_string_equals_literal_ci(encoding_param, "ISO-8859-1")) {
  852. encoding = (XML_Char*)"ISO-8859-1";
  853. } else if (zend_string_equals_literal_ci(encoding_param, "UTF-8")) {
  854. encoding = (XML_Char*)"UTF-8";
  855. } else if (zend_string_equals_literal_ci(encoding_param, "US-ASCII")) {
  856. encoding = (XML_Char*)"US-ASCII";
  857. } else {
  858. zend_argument_value_error(1, "is not a supported source encoding");
  859. RETURN_THROWS();
  860. }
  861. } else {
  862. encoding = XML(default_encoding);
  863. }
  864. if (ns_support && ns_param == NULL){
  865. ns_param = ":";
  866. }
  867. object_init_ex(return_value, xml_parser_ce);
  868. parser = Z_XMLPARSER_P(return_value);
  869. parser->parser = XML_ParserCreate_MM((auto_detect ? NULL : encoding),
  870. &php_xml_mem_hdlrs, (XML_Char*)ns_param);
  871. parser->target_encoding = encoding;
  872. parser->case_folding = 1;
  873. parser->isparsing = 0;
  874. XML_SetUserData(parser->parser, parser);
  875. ZVAL_COPY_VALUE(&parser->index, return_value);
  876. }
  877. /* }}} */
  878. /* {{{ Create an XML parser */
  879. PHP_FUNCTION(xml_parser_create)
  880. {
  881. php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  882. }
  883. /* }}} */
  884. /* {{{ Create an XML parser */
  885. PHP_FUNCTION(xml_parser_create_ns)
  886. {
  887. php_xml_parser_create_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  888. }
  889. /* }}} */
  890. /* {{{ Set up object which should be used for callbacks */
  891. PHP_FUNCTION(xml_set_object)
  892. {
  893. xml_parser *parser;
  894. zval *pind, *mythis;
  895. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oo", &pind, xml_parser_ce, &mythis) == FAILURE) {
  896. RETURN_THROWS();
  897. }
  898. parser = Z_XMLPARSER_P(pind);
  899. zval_ptr_dtor(&parser->object);
  900. ZVAL_OBJ_COPY(&parser->object, Z_OBJ_P(mythis));
  901. RETVAL_TRUE;
  902. }
  903. /* }}} */
  904. /* {{{ Set up start and end element handlers */
  905. PHP_FUNCTION(xml_set_element_handler)
  906. {
  907. xml_parser *parser;
  908. zval *pind, *shdl, *ehdl;
  909. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ozz", &pind, xml_parser_ce, &shdl, &ehdl) == FAILURE) {
  910. RETURN_THROWS();
  911. }
  912. parser = Z_XMLPARSER_P(pind);
  913. xml_set_handler(&parser->startElementHandler, shdl);
  914. xml_set_handler(&parser->endElementHandler, ehdl);
  915. XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
  916. RETVAL_TRUE;
  917. }
  918. /* }}} */
  919. /* {{{ Set up character data handler */
  920. PHP_FUNCTION(xml_set_character_data_handler)
  921. {
  922. xml_parser *parser;
  923. zval *pind, *hdl;
  924. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  925. RETURN_THROWS();
  926. }
  927. parser = Z_XMLPARSER_P(pind);
  928. xml_set_handler(&parser->characterDataHandler, hdl);
  929. XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
  930. RETVAL_TRUE;
  931. }
  932. /* }}} */
  933. /* {{{ Set up processing instruction (PI) handler */
  934. PHP_FUNCTION(xml_set_processing_instruction_handler)
  935. {
  936. xml_parser *parser;
  937. zval *pind, *hdl;
  938. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  939. RETURN_THROWS();
  940. }
  941. parser = Z_XMLPARSER_P(pind);
  942. xml_set_handler(&parser->processingInstructionHandler, hdl);
  943. XML_SetProcessingInstructionHandler(parser->parser, _xml_processingInstructionHandler);
  944. RETVAL_TRUE;
  945. }
  946. /* }}} */
  947. /* {{{ Set up default handler */
  948. PHP_FUNCTION(xml_set_default_handler)
  949. {
  950. xml_parser *parser;
  951. zval *pind, *hdl;
  952. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  953. RETURN_THROWS();
  954. }
  955. parser = Z_XMLPARSER_P(pind);
  956. xml_set_handler(&parser->defaultHandler, hdl);
  957. XML_SetDefaultHandler(parser->parser, _xml_defaultHandler);
  958. RETVAL_TRUE;
  959. }
  960. /* }}} */
  961. /* {{{ Set up unparsed entity declaration handler */
  962. PHP_FUNCTION(xml_set_unparsed_entity_decl_handler)
  963. {
  964. xml_parser *parser;
  965. zval *pind, *hdl;
  966. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  967. RETURN_THROWS();
  968. }
  969. parser = Z_XMLPARSER_P(pind);
  970. xml_set_handler(&parser->unparsedEntityDeclHandler, hdl);
  971. XML_SetUnparsedEntityDeclHandler(parser->parser, _xml_unparsedEntityDeclHandler);
  972. RETVAL_TRUE;
  973. }
  974. /* }}} */
  975. /* {{{ Set up notation declaration handler */
  976. PHP_FUNCTION(xml_set_notation_decl_handler)
  977. {
  978. xml_parser *parser;
  979. zval *pind, *hdl;
  980. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  981. RETURN_THROWS();
  982. }
  983. parser = Z_XMLPARSER_P(pind);
  984. xml_set_handler(&parser->notationDeclHandler, hdl);
  985. XML_SetNotationDeclHandler(parser->parser, _xml_notationDeclHandler);
  986. RETVAL_TRUE;
  987. }
  988. /* }}} */
  989. /* {{{ Set up external entity reference handler */
  990. PHP_FUNCTION(xml_set_external_entity_ref_handler)
  991. {
  992. xml_parser *parser;
  993. zval *pind, *hdl;
  994. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  995. RETURN_THROWS();
  996. }
  997. parser = Z_XMLPARSER_P(pind);
  998. xml_set_handler(&parser->externalEntityRefHandler, hdl);
  999. XML_SetExternalEntityRefHandler(parser->parser, (void *) _xml_externalEntityRefHandler);
  1000. RETVAL_TRUE;
  1001. }
  1002. /* }}} */
  1003. /* {{{ Set up character data handler */
  1004. PHP_FUNCTION(xml_set_start_namespace_decl_handler)
  1005. {
  1006. xml_parser *parser;
  1007. zval *pind, *hdl;
  1008. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  1009. RETURN_THROWS();
  1010. }
  1011. parser = Z_XMLPARSER_P(pind);
  1012. xml_set_handler(&parser->startNamespaceDeclHandler, hdl);
  1013. XML_SetStartNamespaceDeclHandler(parser->parser, _xml_startNamespaceDeclHandler);
  1014. RETVAL_TRUE;
  1015. }
  1016. /* }}} */
  1017. /* {{{ Set up character data handler */
  1018. PHP_FUNCTION(xml_set_end_namespace_decl_handler)
  1019. {
  1020. xml_parser *parser;
  1021. zval *pind, *hdl;
  1022. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Oz", &pind, xml_parser_ce, &hdl) == FAILURE) {
  1023. RETURN_THROWS();
  1024. }
  1025. parser = Z_XMLPARSER_P(pind);
  1026. xml_set_handler(&parser->endNamespaceDeclHandler, hdl);
  1027. XML_SetEndNamespaceDeclHandler(parser->parser, _xml_endNamespaceDeclHandler);
  1028. RETVAL_TRUE;
  1029. }
  1030. /* }}} */
  1031. /* {{{ Start parsing an XML document */
  1032. PHP_FUNCTION(xml_parse)
  1033. {
  1034. xml_parser *parser;
  1035. zval *pind;
  1036. char *data;
  1037. size_t data_len;
  1038. int ret;
  1039. bool isFinal = 0;
  1040. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Os|b", &pind, xml_parser_ce, &data, &data_len, &isFinal) == FAILURE) {
  1041. RETURN_THROWS();
  1042. }
  1043. parser = Z_XMLPARSER_P(pind);
  1044. if (parser->isparsing) {
  1045. zend_throw_error(NULL, "Parser must not be called recursively");
  1046. RETURN_THROWS();
  1047. }
  1048. parser->isparsing = 1;
  1049. ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, isFinal);
  1050. parser->isparsing = 0;
  1051. RETVAL_LONG(ret);
  1052. }
  1053. /* }}} */
  1054. /* {{{ Parsing a XML document */
  1055. PHP_FUNCTION(xml_parse_into_struct)
  1056. {
  1057. xml_parser *parser;
  1058. zval *pind, *xdata, *info = NULL;
  1059. char *data;
  1060. size_t data_len;
  1061. int ret;
  1062. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Osz|z", &pind, xml_parser_ce, &data, &data_len, &xdata, &info) == FAILURE) {
  1063. RETURN_THROWS();
  1064. }
  1065. parser = Z_XMLPARSER_P(pind);
  1066. if (info) {
  1067. info = zend_try_array_init(info);
  1068. if (!info) {
  1069. RETURN_THROWS();
  1070. }
  1071. }
  1072. xdata = zend_try_array_init(xdata);
  1073. if (!xdata) {
  1074. RETURN_THROWS();
  1075. }
  1076. ZVAL_COPY_VALUE(&parser->data, xdata);
  1077. if (info) {
  1078. ZVAL_COPY_VALUE(&parser->info, info);
  1079. }
  1080. parser->level = 0;
  1081. parser->ltags = safe_emalloc(XML_MAXLEVEL, sizeof(char *), 0);
  1082. XML_SetElementHandler(parser->parser, _xml_startElementHandler, _xml_endElementHandler);
  1083. XML_SetCharacterDataHandler(parser->parser, _xml_characterDataHandler);
  1084. if (parser->isparsing) {
  1085. php_error_docref(NULL, E_WARNING, "Parser must not be called recursively");
  1086. RETURN_FALSE;
  1087. }
  1088. parser->isparsing = 1;
  1089. ret = XML_Parse(parser->parser, (XML_Char*)data, data_len, 1);
  1090. parser->isparsing = 0;
  1091. RETVAL_LONG(ret);
  1092. }
  1093. /* }}} */
  1094. /* {{{ Get XML parser error code */
  1095. PHP_FUNCTION(xml_get_error_code)
  1096. {
  1097. xml_parser *parser;
  1098. zval *pind;
  1099. if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
  1100. RETURN_THROWS();
  1101. }
  1102. parser = Z_XMLPARSER_P(pind);
  1103. RETURN_LONG((zend_long)XML_GetErrorCode(parser->parser));
  1104. }
  1105. /* }}} */
  1106. /* {{{ Get XML parser error string */
  1107. PHP_FUNCTION(xml_error_string)
  1108. {
  1109. zend_long code;
  1110. char *str;
  1111. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &code) == FAILURE) {
  1112. RETURN_THROWS();
  1113. }
  1114. str = (char *)XML_ErrorString((int)code);
  1115. if (str) {
  1116. RETVAL_STRING(str);
  1117. }
  1118. }
  1119. /* }}} */
  1120. /* {{{ Get current line number for an XML parser */
  1121. PHP_FUNCTION(xml_get_current_line_number)
  1122. {
  1123. xml_parser *parser;
  1124. zval *pind;
  1125. if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
  1126. RETURN_THROWS();
  1127. }
  1128. parser = Z_XMLPARSER_P(pind);
  1129. RETVAL_LONG(XML_GetCurrentLineNumber(parser->parser));
  1130. }
  1131. /* }}} */
  1132. /* {{{ Get current column number for an XML parser */
  1133. PHP_FUNCTION(xml_get_current_column_number)
  1134. {
  1135. xml_parser *parser;
  1136. zval *pind;
  1137. if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
  1138. RETURN_THROWS();
  1139. }
  1140. parser = Z_XMLPARSER_P(pind);
  1141. RETVAL_LONG(XML_GetCurrentColumnNumber(parser->parser));
  1142. }
  1143. /* }}} */
  1144. /* {{{ Get current byte index for an XML parser */
  1145. PHP_FUNCTION(xml_get_current_byte_index)
  1146. {
  1147. xml_parser *parser;
  1148. zval *pind;
  1149. if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
  1150. RETURN_THROWS();
  1151. }
  1152. parser = Z_XMLPARSER_P(pind);
  1153. RETVAL_LONG(XML_GetCurrentByteIndex(parser->parser));
  1154. }
  1155. /* }}} */
  1156. /* {{{ Free an XML parser */
  1157. PHP_FUNCTION(xml_parser_free)
  1158. {
  1159. zval *pind;
  1160. xml_parser *parser;
  1161. if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &pind, xml_parser_ce) == FAILURE) {
  1162. RETURN_THROWS();
  1163. }
  1164. parser = Z_XMLPARSER_P(pind);
  1165. if (parser->isparsing == 1) {
  1166. php_error_docref(NULL, E_WARNING, "Parser cannot be freed while it is parsing");
  1167. RETURN_FALSE;
  1168. }
  1169. RETURN_TRUE;
  1170. }
  1171. /* }}} */
  1172. /* {{{ Set options in an XML parser */
  1173. PHP_FUNCTION(xml_parser_set_option)
  1174. {
  1175. xml_parser *parser;
  1176. zval *pind, *val;
  1177. zend_long opt;
  1178. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Olz", &pind, xml_parser_ce, &opt, &val) == FAILURE) {
  1179. RETURN_THROWS();
  1180. }
  1181. parser = Z_XMLPARSER_P(pind);
  1182. switch (opt) {
  1183. case PHP_XML_OPTION_CASE_FOLDING:
  1184. parser->case_folding = zval_get_long(val);
  1185. break;
  1186. case PHP_XML_OPTION_SKIP_TAGSTART:
  1187. parser->toffset = zval_get_long(val);
  1188. if (parser->toffset < 0) {
  1189. php_error_docref(NULL, E_WARNING, "tagstart ignored, because it is out of range");
  1190. parser->toffset = 0;
  1191. }
  1192. break;
  1193. case PHP_XML_OPTION_SKIP_WHITE:
  1194. parser->skipwhite = zval_get_long(val);
  1195. break;
  1196. case PHP_XML_OPTION_TARGET_ENCODING: {
  1197. const xml_encoding *enc;
  1198. if (!try_convert_to_string(val)) {
  1199. RETURN_THROWS();
  1200. }
  1201. enc = xml_get_encoding((XML_Char*)Z_STRVAL_P(val));
  1202. if (enc == NULL) {
  1203. zend_argument_value_error(3, "is not a supported target encoding");
  1204. RETURN_THROWS();
  1205. }
  1206. parser->target_encoding = enc->name;
  1207. break;
  1208. }
  1209. default:
  1210. zend_argument_value_error(2, "must be a PHP_XML_OPTION_* constant");
  1211. RETURN_THROWS();
  1212. break;
  1213. }
  1214. RETVAL_TRUE;
  1215. }
  1216. /* }}} */
  1217. /* {{{ Get options from an XML parser */
  1218. PHP_FUNCTION(xml_parser_get_option)
  1219. {
  1220. xml_parser *parser;
  1221. zval *pind;
  1222. zend_long opt;
  1223. if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pind, xml_parser_ce, &opt) == FAILURE) {
  1224. RETURN_THROWS();
  1225. }
  1226. parser = Z_XMLPARSER_P(pind);
  1227. switch (opt) {
  1228. case PHP_XML_OPTION_CASE_FOLDING:
  1229. RETURN_LONG(parser->case_folding);
  1230. break;
  1231. case PHP_XML_OPTION_SKIP_TAGSTART:
  1232. RETURN_LONG(parser->toffset);
  1233. break;
  1234. case PHP_XML_OPTION_SKIP_WHITE:
  1235. RETURN_LONG(parser->skipwhite);
  1236. break;
  1237. case PHP_XML_OPTION_TARGET_ENCODING:
  1238. RETURN_STRING((char *)parser->target_encoding);
  1239. break;
  1240. default:
  1241. zend_argument_value_error(2, "must be a PHP_XML_OPTION_* constant");
  1242. RETURN_THROWS();
  1243. }
  1244. }
  1245. /* }}} */
  1246. #endif