parser.c 399 KB


  1. /*
  2. * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
  3. * implemented on top of the SAX interfaces
  4. *
  5. * References:
  6. * The XML specification:
  7. * http://www.w3.org/TR/REC-xml
  8. * Original 1.0 version:
  9. * http://www.w3.org/TR/1998/REC-xml-19980210
  10. * XML second edition working draft
  11. * http://www.w3.org/TR/2000/WD-xml-2e-20000814
  12. *
  13. * Okay this is a big file, the parser core is around 7000 lines, then it
  14. * is followed by the progressive parser top routines, then the various
  15. * high level APIs to call the parser and a few miscellaneous functions.
  16. * A number of helper functions and deprecated ones have been moved to
  17. * parserInternals.c to reduce this file size.
  18. * As much as possible the functions are associated with their relative
  19. * production in the XML specification. A few productions defining the
  20. * different ranges of character are actually implanted either in
  21. * parserInternals.h or parserInternals.c
  22. * The DOM tree build is realized from the default SAX callbacks in
  23. * the module SAX.c.
  24. * The routines doing the validation checks are in valid.c and called either
  25. * from the SAX callbacks or as standalone functions using a preparsed
  26. * document.
  27. *
  28. * See Copyright for the status of this software.
  29. *
  30. * daniel@veillard.com
  31. */
  32. #define IN_LIBXML
  33. #include "libxml.h"
  34. #if defined(WIN32) && !defined (__CYGWIN__)
  35. #define XML_DIR_SEP '\\'
  36. #else
  37. #define XML_DIR_SEP '/'
  38. #endif
  39. #include <stdlib.h>
  40. #include <string.h>
  41. #include <stdarg.h>
  42. #include <libxml/xmlmemory.h>
  43. #include <libxml/threads.h>
  44. #include <libxml/globals.h>
  45. #include <libxml/tree.h>
  46. #include <libxml/parser.h>
  47. #include <libxml/parserInternals.h>
  48. #include <libxml/valid.h>
  49. #include <libxml/entities.h>
  50. #include <libxml/xmlerror.h>
  51. #include <libxml/encoding.h>
  52. #include <libxml/xmlIO.h>
  53. #include <libxml/uri.h>
  54. #ifdef LIBXML_CATALOG_ENABLED
  55. #include <libxml/catalog.h>
  56. #endif
  57. #ifdef LIBXML_SCHEMAS_ENABLED
  58. #include <libxml/xmlschemastypes.h>
  59. #include <libxml/relaxng.h>
  60. #endif
  61. #ifdef HAVE_CTYPE_H
  62. #include <ctype.h>
  63. #endif
  64. #ifdef HAVE_STDLIB_H
  65. #include <stdlib.h>
  66. #endif
  67. #ifdef HAVE_SYS_STAT_H
  68. #include <sys/stat.h>
  69. #endif
  70. #ifdef HAVE_FCNTL_H
  71. #include <fcntl.h>
  72. #endif
  73. #ifdef HAVE_UNISTD_H
  74. #include <unistd.h>
  75. #endif
  76. #ifdef HAVE_ZLIB_H
  77. #include <zlib.h>
  78. #endif
  79. static void
  80. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
  81. static xmlParserCtxtPtr
  82. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  83. const xmlChar *base, xmlParserCtxtPtr pctx);
  84. /************************************************************************
  85. * *
  86. * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
  87. * *
  88. ************************************************************************/
  89. #define XML_PARSER_BIG_ENTITY 1000
  90. #define XML_PARSER_LOT_ENTITY 5000
  91. /*
  92. * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
  93. * replacement over the size in byte of the input indicates that you have
  94. * and eponential behaviour. A value of 10 correspond to at least 3 entity
  95. * replacement per byte of input.
  96. */
  97. #define XML_PARSER_NON_LINEAR 10
  98. /*
  99. * xmlParserEntityCheck
  100. *
  101. * Function to check non-linear entity expansion behaviour
  102. * This is here to detect and stop exponential linear entity expansion
  103. * This is not a limitation of the parser but a safety
  104. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  105. * parser option.
  106. */
  107. static int
  108. xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
  109. xmlEntityPtr ent)
  110. {
  111. unsigned long consumed = 0;
  112. if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
  113. return (0);
  114. if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
  115. return (1);
  116. if (size != 0) {
  117. /*
  118. * Do the check based on the replacement size of the entity
  119. */
  120. if (size < XML_PARSER_BIG_ENTITY)
  121. return(0);
  122. /*
  123. * A limit on the amount of text data reasonably used
  124. */
  125. if (ctxt->input != NULL) {
  126. consumed = ctxt->input->consumed +
  127. (ctxt->input->cur - ctxt->input->base);
  128. }
  129. consumed += ctxt->sizeentities;
  130. if ((size < XML_PARSER_NON_LINEAR * consumed) &&
  131. (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
  132. return (0);
  133. } else if (ent != NULL) {
  134. /*
  135. * use the number of parsed entities in the replacement
  136. */
  137. size = ent->checked;
  138. /*
  139. * The amount of data parsed counting entities size only once
  140. */
  141. if (ctxt->input != NULL) {
  142. consumed = ctxt->input->consumed +
  143. (ctxt->input->cur - ctxt->input->base);
  144. }
  145. consumed += ctxt->sizeentities;
  146. /*
  147. * Check the density of entities for the amount of data
  148. * knowing an entity reference will take at least 3 bytes
  149. */
  150. if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
  151. return (0);
  152. } else {
  153. /*
  154. * strange we got no data for checking just return
  155. */
  156. return (0);
  157. }
  158. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  159. return (1);
  160. }
  161. /**
  162. * xmlParserMaxDepth:
  163. *
  164. * arbitrary depth limit for the XML documents that we allow to
  165. * process. This is not a limitation of the parser but a safety
  166. * boundary feature. It can be disabled with the XML_PARSE_HUGE
  167. * parser option.
  168. */
  169. unsigned int xmlParserMaxDepth = 256;
  170. #define SAX2 1
  171. #define XML_PARSER_BIG_BUFFER_SIZE 300
  172. #define XML_PARSER_BUFFER_SIZE 100
  173. #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
  174. /*
  175. * List of XML prefixed PI allowed by W3C specs
  176. */
  177. static const char *xmlW3CPIs[] = {
  178. "xml-stylesheet",
  179. NULL
  180. };
  181. /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
  182. static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
  183. const xmlChar **str);
  184. static xmlParserErrors
  185. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  186. xmlSAXHandlerPtr sax,
  187. void *user_data, int depth, const xmlChar *URL,
  188. const xmlChar *ID, xmlNodePtr *list);
  189. static int
  190. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
  191. const char *encoding);
  192. #ifdef LIBXML_LEGACY_ENABLED
  193. static void
  194. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  195. xmlNodePtr lastNode);
  196. #endif /* LIBXML_LEGACY_ENABLED */
  197. static xmlParserErrors
  198. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  199. const xmlChar *string, void *user_data, xmlNodePtr *lst);
  200. static int
  201. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
  202. /************************************************************************
  203. * *
  204. * Some factorized error routines *
  205. * *
  206. ************************************************************************/
  207. /**
  208. * xmlErrAttributeDup:
  209. * @ctxt: an XML parser context
  210. * @prefix: the attribute prefix
  211. * @localname: the attribute localname
  212. *
  213. * Handle a redefinition of attribute error
  214. */
  215. static void
  216. xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
  217. const xmlChar * localname)
  218. {
  219. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  220. (ctxt->instate == XML_PARSER_EOF))
  221. return;
  222. if (ctxt != NULL)
  223. ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
  224. if (prefix == NULL)
  225. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  226. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  227. (const char *) localname, NULL, NULL, 0, 0,
  228. "Attribute %s redefined\n", localname);
  229. else
  230. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
  231. XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
  232. (const char *) prefix, (const char *) localname,
  233. NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
  234. localname);
  235. if (ctxt != NULL) {
  236. ctxt->wellFormed = 0;
  237. if (ctxt->recovery == 0)
  238. ctxt->disableSAX = 1;
  239. }
  240. }
  241. /**
  242. * xmlFatalErr:
  243. * @ctxt: an XML parser context
  244. * @error: the error number
  245. * @extra: extra information string
  246. *
  247. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  248. */
  249. static void
  250. xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
  251. {
  252. const char *errmsg;
  253. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  254. (ctxt->instate == XML_PARSER_EOF))
  255. return;
  256. switch (error) {
  257. case XML_ERR_INVALID_HEX_CHARREF:
  258. errmsg = "CharRef: invalid hexadecimal value\n";
  259. break;
  260. case XML_ERR_INVALID_DEC_CHARREF:
  261. errmsg = "CharRef: invalid decimal value\n";
  262. break;
  263. case XML_ERR_INVALID_CHARREF:
  264. errmsg = "CharRef: invalid value\n";
  265. break;
  266. case XML_ERR_INTERNAL_ERROR:
  267. errmsg = "internal error";
  268. break;
  269. case XML_ERR_PEREF_AT_EOF:
  270. errmsg = "PEReference at end of document\n";
  271. break;
  272. case XML_ERR_PEREF_IN_PROLOG:
  273. errmsg = "PEReference in prolog\n";
  274. break;
  275. case XML_ERR_PEREF_IN_EPILOG:
  276. errmsg = "PEReference in epilog\n";
  277. break;
  278. case XML_ERR_PEREF_NO_NAME:
  279. errmsg = "PEReference: no name\n";
  280. break;
  281. case XML_ERR_PEREF_SEMICOL_MISSING:
  282. errmsg = "PEReference: expecting ';'\n";
  283. break;
  284. case XML_ERR_ENTITY_LOOP:
  285. errmsg = "Detected an entity reference loop\n";
  286. break;
  287. case XML_ERR_ENTITY_NOT_STARTED:
  288. errmsg = "EntityValue: \" or ' expected\n";
  289. break;
  290. case XML_ERR_ENTITY_PE_INTERNAL:
  291. errmsg = "PEReferences forbidden in internal subset\n";
  292. break;
  293. case XML_ERR_ENTITY_NOT_FINISHED:
  294. errmsg = "EntityValue: \" or ' expected\n";
  295. break;
  296. case XML_ERR_ATTRIBUTE_NOT_STARTED:
  297. errmsg = "AttValue: \" or ' expected\n";
  298. break;
  299. case XML_ERR_LT_IN_ATTRIBUTE:
  300. errmsg = "Unescaped '<' not allowed in attributes values\n";
  301. break;
  302. case XML_ERR_LITERAL_NOT_STARTED:
  303. errmsg = "SystemLiteral \" or ' expected\n";
  304. break;
  305. case XML_ERR_LITERAL_NOT_FINISHED:
  306. errmsg = "Unfinished System or Public ID \" or ' expected\n";
  307. break;
  308. case XML_ERR_MISPLACED_CDATA_END:
  309. errmsg = "Sequence ']]>' not allowed in content\n";
  310. break;
  311. case XML_ERR_URI_REQUIRED:
  312. errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
  313. break;
  314. case XML_ERR_PUBID_REQUIRED:
  315. errmsg = "PUBLIC, the Public Identifier is missing\n";
  316. break;
  317. case XML_ERR_HYPHEN_IN_COMMENT:
  318. errmsg = "Comment must not contain '--' (double-hyphen)\n";
  319. break;
  320. case XML_ERR_PI_NOT_STARTED:
  321. errmsg = "xmlParsePI : no target name\n";
  322. break;
  323. case XML_ERR_RESERVED_XML_NAME:
  324. errmsg = "Invalid PI name\n";
  325. break;
  326. case XML_ERR_NOTATION_NOT_STARTED:
  327. errmsg = "NOTATION: Name expected here\n";
  328. break;
  329. case XML_ERR_NOTATION_NOT_FINISHED:
  330. errmsg = "'>' required to close NOTATION declaration\n";
  331. break;
  332. case XML_ERR_VALUE_REQUIRED:
  333. errmsg = "Entity value required\n";
  334. break;
  335. case XML_ERR_URI_FRAGMENT:
  336. errmsg = "Fragment not allowed";
  337. break;
  338. case XML_ERR_ATTLIST_NOT_STARTED:
  339. errmsg = "'(' required to start ATTLIST enumeration\n";
  340. break;
  341. case XML_ERR_NMTOKEN_REQUIRED:
  342. errmsg = "NmToken expected in ATTLIST enumeration\n";
  343. break;
  344. case XML_ERR_ATTLIST_NOT_FINISHED:
  345. errmsg = "')' required to finish ATTLIST enumeration\n";
  346. break;
  347. case XML_ERR_MIXED_NOT_STARTED:
  348. errmsg = "MixedContentDecl : '|' or ')*' expected\n";
  349. break;
  350. case XML_ERR_PCDATA_REQUIRED:
  351. errmsg = "MixedContentDecl : '#PCDATA' expected\n";
  352. break;
  353. case XML_ERR_ELEMCONTENT_NOT_STARTED:
  354. errmsg = "ContentDecl : Name or '(' expected\n";
  355. break;
  356. case XML_ERR_ELEMCONTENT_NOT_FINISHED:
  357. errmsg = "ContentDecl : ',' '|' or ')' expected\n";
  358. break;
  359. case XML_ERR_PEREF_IN_INT_SUBSET:
  360. errmsg =
  361. "PEReference: forbidden within markup decl in internal subset\n";
  362. break;
  363. case XML_ERR_GT_REQUIRED:
  364. errmsg = "expected '>'\n";
  365. break;
  366. case XML_ERR_CONDSEC_INVALID:
  367. errmsg = "XML conditional section '[' expected\n";
  368. break;
  369. case XML_ERR_EXT_SUBSET_NOT_FINISHED:
  370. errmsg = "Content error in the external subset\n";
  371. break;
  372. case XML_ERR_CONDSEC_INVALID_KEYWORD:
  373. errmsg =
  374. "conditional section INCLUDE or IGNORE keyword expected\n";
  375. break;
  376. case XML_ERR_CONDSEC_NOT_FINISHED:
  377. errmsg = "XML conditional section not closed\n";
  378. break;
  379. case XML_ERR_XMLDECL_NOT_STARTED:
  380. errmsg = "Text declaration '<?xml' required\n";
  381. break;
  382. case XML_ERR_XMLDECL_NOT_FINISHED:
  383. errmsg = "parsing XML declaration: '?>' expected\n";
  384. break;
  385. case XML_ERR_EXT_ENTITY_STANDALONE:
  386. errmsg = "external parsed entities cannot be standalone\n";
  387. break;
  388. case XML_ERR_ENTITYREF_SEMICOL_MISSING:
  389. errmsg = "EntityRef: expecting ';'\n";
  390. break;
  391. case XML_ERR_DOCTYPE_NOT_FINISHED:
  392. errmsg = "DOCTYPE improperly terminated\n";
  393. break;
  394. case XML_ERR_LTSLASH_REQUIRED:
  395. errmsg = "EndTag: '</' not found\n";
  396. break;
  397. case XML_ERR_EQUAL_REQUIRED:
  398. errmsg = "expected '='\n";
  399. break;
  400. case XML_ERR_STRING_NOT_CLOSED:
  401. errmsg = "String not closed expecting \" or '\n";
  402. break;
  403. case XML_ERR_STRING_NOT_STARTED:
  404. errmsg = "String not started expecting ' or \"\n";
  405. break;
  406. case XML_ERR_ENCODING_NAME:
  407. errmsg = "Invalid XML encoding name\n";
  408. break;
  409. case XML_ERR_STANDALONE_VALUE:
  410. errmsg = "standalone accepts only 'yes' or 'no'\n";
  411. break;
  412. case XML_ERR_DOCUMENT_EMPTY:
  413. errmsg = "Document is empty\n";
  414. break;
  415. case XML_ERR_DOCUMENT_END:
  416. errmsg = "Extra content at the end of the document\n";
  417. break;
  418. case XML_ERR_NOT_WELL_BALANCED:
  419. errmsg = "chunk is not well balanced\n";
  420. break;
  421. case XML_ERR_EXTRA_CONTENT:
  422. errmsg = "extra content at the end of well balanced chunk\n";
  423. break;
  424. case XML_ERR_VERSION_MISSING:
  425. errmsg = "Malformed declaration expecting version\n";
  426. break;
  427. #if 0
  428. case:
  429. errmsg = "\n";
  430. break;
  431. #endif
  432. default:
  433. errmsg = "Unregistered error message\n";
  434. }
  435. if (ctxt != NULL)
  436. ctxt->errNo = error;
  437. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  438. XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
  439. info);
  440. if (ctxt != NULL) {
  441. ctxt->wellFormed = 0;
  442. if (ctxt->recovery == 0)
  443. ctxt->disableSAX = 1;
  444. }
  445. }
  446. /**
  447. * xmlFatalErrMsg:
  448. * @ctxt: an XML parser context
  449. * @error: the error number
  450. * @msg: the error message
  451. *
  452. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  453. */
  454. static void
  455. xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  456. const char *msg)
  457. {
  458. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  459. (ctxt->instate == XML_PARSER_EOF))
  460. return;
  461. if (ctxt != NULL)
  462. ctxt->errNo = error;
  463. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
  464. XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
  465. if (ctxt != NULL) {
  466. ctxt->wellFormed = 0;
  467. if (ctxt->recovery == 0)
  468. ctxt->disableSAX = 1;
  469. }
  470. }
  471. /**
  472. * xmlWarningMsg:
  473. * @ctxt: an XML parser context
  474. * @error: the error number
  475. * @msg: the error message
  476. * @str1: extra data
  477. * @str2: extra data
  478. *
  479. * Handle a warning.
  480. */
  481. static void
  482. xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  483. const char *msg, const xmlChar *str1, const xmlChar *str2)
  484. {
  485. xmlStructuredErrorFunc schannel = NULL;
  486. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  487. (ctxt->instate == XML_PARSER_EOF))
  488. return;
  489. if ((ctxt != NULL) && (ctxt->sax != NULL) &&
  490. (ctxt->sax->initialized == XML_SAX2_MAGIC))
  491. schannel = ctxt->sax->serror;
  492. if (ctxt != NULL) {
  493. __xmlRaiseError(schannel,
  494. (ctxt->sax) ? ctxt->sax->warning : NULL,
  495. ctxt->userData,
  496. ctxt, NULL, XML_FROM_PARSER, error,
  497. XML_ERR_WARNING, NULL, 0,
  498. (const char *) str1, (const char *) str2, NULL, 0, 0,
  499. msg, (const char *) str1, (const char *) str2);
  500. } else {
  501. __xmlRaiseError(schannel, NULL, NULL,
  502. ctxt, NULL, XML_FROM_PARSER, error,
  503. XML_ERR_WARNING, NULL, 0,
  504. (const char *) str1, (const char *) str2, NULL, 0, 0,
  505. msg, (const char *) str1, (const char *) str2);
  506. }
  507. }
  508. /**
  509. * xmlValidityError:
  510. * @ctxt: an XML parser context
  511. * @error: the error number
  512. * @msg: the error message
  513. * @str1: extra data
  514. *
  515. * Handle a validity error.
  516. */
  517. static void
  518. xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  519. const char *msg, const xmlChar *str1, const xmlChar *str2)
  520. {
  521. xmlStructuredErrorFunc schannel = NULL;
  522. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  523. (ctxt->instate == XML_PARSER_EOF))
  524. return;
  525. if (ctxt != NULL) {
  526. ctxt->errNo = error;
  527. if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
  528. schannel = ctxt->sax->serror;
  529. }
  530. if (ctxt != NULL) {
  531. __xmlRaiseError(schannel,
  532. ctxt->vctxt.error, ctxt->vctxt.userData,
  533. ctxt, NULL, XML_FROM_DTD, error,
  534. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  535. (const char *) str2, NULL, 0, 0,
  536. msg, (const char *) str1, (const char *) str2);
  537. ctxt->valid = 0;
  538. } else {
  539. __xmlRaiseError(schannel, NULL, NULL,
  540. ctxt, NULL, XML_FROM_DTD, error,
  541. XML_ERR_ERROR, NULL, 0, (const char *) str1,
  542. (const char *) str2, NULL, 0, 0,
  543. msg, (const char *) str1, (const char *) str2);
  544. }
  545. }
  546. /**
  547. * xmlFatalErrMsgInt:
  548. * @ctxt: an XML parser context
  549. * @error: the error number
  550. * @msg: the error message
  551. * @val: an integer value
  552. *
  553. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  554. */
  555. static void
  556. xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  557. const char *msg, int val)
  558. {
  559. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  560. (ctxt->instate == XML_PARSER_EOF))
  561. return;
  562. if (ctxt != NULL)
  563. ctxt->errNo = error;
  564. __xmlRaiseError(NULL, NULL, NULL,
  565. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  566. NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
  567. if (ctxt != NULL) {
  568. ctxt->wellFormed = 0;
  569. if (ctxt->recovery == 0)
  570. ctxt->disableSAX = 1;
  571. }
  572. }
  573. /**
  574. * xmlFatalErrMsgStrIntStr:
  575. * @ctxt: an XML parser context
  576. * @error: the error number
  577. * @msg: the error message
  578. * @str1: an string info
  579. * @val: an integer value
  580. * @str2: an string info
  581. *
  582. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  583. */
  584. static void
  585. xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  586. const char *msg, const xmlChar *str1, int val,
  587. const xmlChar *str2)
  588. {
  589. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  590. (ctxt->instate == XML_PARSER_EOF))
  591. return;
  592. if (ctxt != NULL)
  593. ctxt->errNo = error;
  594. __xmlRaiseError(NULL, NULL, NULL,
  595. ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
  596. NULL, 0, (const char *) str1, (const char *) str2,
  597. NULL, val, 0, msg, str1, val, str2);
  598. if (ctxt != NULL) {
  599. ctxt->wellFormed = 0;
  600. if (ctxt->recovery == 0)
  601. ctxt->disableSAX = 1;
  602. }
  603. }
  604. /**
  605. * xmlFatalErrMsgStr:
  606. * @ctxt: an XML parser context
  607. * @error: the error number
  608. * @msg: the error message
  609. * @val: a string value
  610. *
  611. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  612. */
  613. static void
  614. xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  615. const char *msg, const xmlChar * val)
  616. {
  617. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  618. (ctxt->instate == XML_PARSER_EOF))
  619. return;
  620. if (ctxt != NULL)
  621. ctxt->errNo = error;
  622. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  623. XML_FROM_PARSER, error, XML_ERR_FATAL,
  624. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  625. val);
  626. if (ctxt != NULL) {
  627. ctxt->wellFormed = 0;
  628. if (ctxt->recovery == 0)
  629. ctxt->disableSAX = 1;
  630. }
  631. }
  632. /**
  633. * xmlErrMsgStr:
  634. * @ctxt: an XML parser context
  635. * @error: the error number
  636. * @msg: the error message
  637. * @val: a string value
  638. *
  639. * Handle a non fatal parser error
  640. */
  641. static void
  642. xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  643. const char *msg, const xmlChar * val)
  644. {
  645. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  646. (ctxt->instate == XML_PARSER_EOF))
  647. return;
  648. if (ctxt != NULL)
  649. ctxt->errNo = error;
  650. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
  651. XML_FROM_PARSER, error, XML_ERR_ERROR,
  652. NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
  653. val);
  654. }
  655. /**
  656. * xmlNsErr:
  657. * @ctxt: an XML parser context
  658. * @error: the error number
  659. * @msg: the message
  660. * @info1: extra information string
  661. * @info2: extra information string
  662. *
  663. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  664. */
  665. static void
  666. xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  667. const char *msg,
  668. const xmlChar * info1, const xmlChar * info2,
  669. const xmlChar * info3)
  670. {
  671. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  672. (ctxt->instate == XML_PARSER_EOF))
  673. return;
  674. if (ctxt != NULL)
  675. ctxt->errNo = error;
  676. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  677. XML_ERR_ERROR, NULL, 0, (const char *) info1,
  678. (const char *) info2, (const char *) info3, 0, 0, msg,
  679. info1, info2, info3);
  680. if (ctxt != NULL)
  681. ctxt->nsWellFormed = 0;
  682. }
  683. /**
  684. * xmlNsWarn
  685. * @ctxt: an XML parser context
  686. * @error: the error number
  687. * @msg: the message
  688. * @info1: extra information string
  689. * @info2: extra information string
  690. *
  691. * Handle a fatal parser error, i.e. violating Well-Formedness constraints
  692. */
  693. static void
  694. xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
  695. const char *msg,
  696. const xmlChar * info1, const xmlChar * info2,
  697. const xmlChar * info3)
  698. {
  699. if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
  700. (ctxt->instate == XML_PARSER_EOF))
  701. return;
  702. __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
  703. XML_ERR_WARNING, NULL, 0, (const char *) info1,
  704. (const char *) info2, (const char *) info3, 0, 0, msg,
  705. info1, info2, info3);
  706. }
  707. /************************************************************************
  708. * *
  709. * Library wide options *
  710. * *
  711. ************************************************************************/
  712. /**
  713. * xmlHasFeature:
  714. * @feature: the feature to be examined
  715. *
  716. * Examines if the library has been compiled with a given feature.
  717. *
  718. * Returns a non-zero value if the feature exist, otherwise zero.
  719. * Returns zero (0) if the feature does not exist or an unknown
  720. * unknown feature is requested, non-zero otherwise.
  721. */
  722. int
  723. xmlHasFeature(xmlFeature feature)
  724. {
  725. switch (feature) {
  726. case XML_WITH_THREAD:
  727. #ifdef LIBXML_THREAD_ENABLED
  728. return(1);
  729. #else
  730. return(0);
  731. #endif
  732. case XML_WITH_TREE:
  733. #ifdef LIBXML_TREE_ENABLED
  734. return(1);
  735. #else
  736. return(0);
  737. #endif
  738. case XML_WITH_OUTPUT:
  739. #ifdef LIBXML_OUTPUT_ENABLED
  740. return(1);
  741. #else
  742. return(0);
  743. #endif
  744. case XML_WITH_PUSH:
  745. #ifdef LIBXML_PUSH_ENABLED
  746. return(1);
  747. #else
  748. return(0);
  749. #endif
  750. case XML_WITH_READER:
  751. #ifdef LIBXML_READER_ENABLED
  752. return(1);
  753. #else
  754. return(0);
  755. #endif
  756. case XML_WITH_PATTERN:
  757. #ifdef LIBXML_PATTERN_ENABLED
  758. return(1);
  759. #else
  760. return(0);
  761. #endif
  762. case XML_WITH_WRITER:
  763. #ifdef LIBXML_WRITER_ENABLED
  764. return(1);
  765. #else
  766. return(0);
  767. #endif
  768. case XML_WITH_SAX1:
  769. #ifdef LIBXML_SAX1_ENABLED
  770. return(1);
  771. #else
  772. return(0);
  773. #endif
  774. case XML_WITH_FTP:
  775. #ifdef LIBXML_FTP_ENABLED
  776. return(1);
  777. #else
  778. return(0);
  779. #endif
  780. case XML_WITH_HTTP:
  781. #ifdef LIBXML_HTTP_ENABLED
  782. return(1);
  783. #else
  784. return(0);
  785. #endif
  786. case XML_WITH_VALID:
  787. #ifdef LIBXML_VALID_ENABLED
  788. return(1);
  789. #else
  790. return(0);
  791. #endif
  792. case XML_WITH_HTML:
  793. #ifdef LIBXML_HTML_ENABLED
  794. return(1);
  795. #else
  796. return(0);
  797. #endif
  798. case XML_WITH_LEGACY:
  799. #ifdef LIBXML_LEGACY_ENABLED
  800. return(1);
  801. #else
  802. return(0);
  803. #endif
  804. case XML_WITH_C14N:
  805. #ifdef LIBXML_C14N_ENABLED
  806. return(1);
  807. #else
  808. return(0);
  809. #endif
  810. case XML_WITH_CATALOG:
  811. #ifdef LIBXML_CATALOG_ENABLED
  812. return(1);
  813. #else
  814. return(0);
  815. #endif
  816. case XML_WITH_XPATH:
  817. #ifdef LIBXML_XPATH_ENABLED
  818. return(1);
  819. #else
  820. return(0);
  821. #endif
  822. case XML_WITH_XPTR:
  823. #ifdef LIBXML_XPTR_ENABLED
  824. return(1);
  825. #else
  826. return(0);
  827. #endif
  828. case XML_WITH_XINCLUDE:
  829. #ifdef LIBXML_XINCLUDE_ENABLED
  830. return(1);
  831. #else
  832. return(0);
  833. #endif
  834. case XML_WITH_ICONV:
  835. #ifdef LIBXML_ICONV_ENABLED
  836. return(1);
  837. #else
  838. return(0);
  839. #endif
  840. case XML_WITH_ISO8859X:
  841. #ifdef LIBXML_ISO8859X_ENABLED
  842. return(1);
  843. #else
  844. return(0);
  845. #endif
  846. case XML_WITH_UNICODE:
  847. #ifdef LIBXML_UNICODE_ENABLED
  848. return(1);
  849. #else
  850. return(0);
  851. #endif
  852. case XML_WITH_REGEXP:
  853. #ifdef LIBXML_REGEXP_ENABLED
  854. return(1);
  855. #else
  856. return(0);
  857. #endif
  858. case XML_WITH_AUTOMATA:
  859. #ifdef LIBXML_AUTOMATA_ENABLED
  860. return(1);
  861. #else
  862. return(0);
  863. #endif
  864. case XML_WITH_EXPR:
  865. #ifdef LIBXML_EXPR_ENABLED
  866. return(1);
  867. #else
  868. return(0);
  869. #endif
  870. case XML_WITH_SCHEMAS:
  871. #ifdef LIBXML_SCHEMAS_ENABLED
  872. return(1);
  873. #else
  874. return(0);
  875. #endif
  876. case XML_WITH_SCHEMATRON:
  877. #ifdef LIBXML_SCHEMATRON_ENABLED
  878. return(1);
  879. #else
  880. return(0);
  881. #endif
  882. case XML_WITH_MODULES:
  883. #ifdef LIBXML_MODULES_ENABLED
  884. return(1);
  885. #else
  886. return(0);
  887. #endif
  888. case XML_WITH_DEBUG:
  889. #ifdef LIBXML_DEBUG_ENABLED
  890. return(1);
  891. #else
  892. return(0);
  893. #endif
  894. case XML_WITH_DEBUG_MEM:
  895. #ifdef DEBUG_MEMORY_LOCATION
  896. return(1);
  897. #else
  898. return(0);
  899. #endif
  900. case XML_WITH_DEBUG_RUN:
  901. #ifdef LIBXML_DEBUG_RUNTIME
  902. return(1);
  903. #else
  904. return(0);
  905. #endif
  906. case XML_WITH_ZLIB:
  907. #ifdef LIBXML_ZLIB_ENABLED
  908. return(1);
  909. #else
  910. return(0);
  911. #endif
  912. default:
  913. break;
  914. }
  915. return(0);
  916. }
  917. /************************************************************************
  918. * *
  919. * SAX2 defaulted attributes handling *
  920. * *
  921. ************************************************************************/
  922. /**
  923. * xmlDetectSAX2:
  924. * @ctxt: an XML parser context
  925. *
  926. * Do the SAX2 detection and specific intialization
  927. */
  928. static void
  929. xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
  930. if (ctxt == NULL) return;
  931. #ifdef LIBXML_SAX1_ENABLED
  932. if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
  933. ((ctxt->sax->startElementNs != NULL) ||
  934. (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
  935. #else
  936. ctxt->sax2 = 1;
  937. #endif /* LIBXML_SAX1_ENABLED */
  938. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  939. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  940. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  941. if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
  942. (ctxt->str_xml_ns == NULL)) {
  943. xmlErrMemory(ctxt, NULL);
  944. }
  945. }
  946. typedef struct _xmlDefAttrs xmlDefAttrs;
  947. typedef xmlDefAttrs *xmlDefAttrsPtr;
  948. struct _xmlDefAttrs {
  949. int nbAttrs; /* number of defaulted attributes on that element */
  950. int maxAttrs; /* the size of the array */
  951. const xmlChar *values[5]; /* array of localname/prefix/values/external */
  952. };
  953. /**
  954. * xmlAttrNormalizeSpace:
  955. * @src: the source string
  956. * @dst: the target string
  957. *
  958. * Normalize the space in non CDATA attribute values:
  959. * If the attribute type is not CDATA, then the XML processor MUST further
  960. * process the normalized attribute value by discarding any leading and
  961. * trailing space (#x20) characters, and by replacing sequences of space
  962. * (#x20) characters by a single space (#x20) character.
  963. * Note that the size of dst need to be at least src, and if one doesn't need
  964. * to preserve dst (and it doesn't come from a dictionary or read-only) then
  965. * passing src as dst is just fine.
  966. *
  967. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  968. * is needed.
  969. */
  970. static xmlChar *
  971. xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
  972. {
  973. if ((src == NULL) || (dst == NULL))
  974. return(NULL);
  975. while (*src == 0x20) src++;
  976. while (*src != 0) {
  977. if (*src == 0x20) {
  978. while (*src == 0x20) src++;
  979. if (*src != 0)
  980. *dst++ = 0x20;
  981. } else {
  982. *dst++ = *src++;
  983. }
  984. }
  985. *dst = 0;
  986. if (dst == src)
  987. return(NULL);
  988. return(dst);
  989. }
  990. /**
  991. * xmlAttrNormalizeSpace2:
  992. * @src: the source string
  993. *
  994. * Normalize the space in non CDATA attribute values, a slightly more complex
  995. * front end to avoid allocation problems when running on attribute values
  996. * coming from the input.
  997. *
  998. * Returns a pointer to the normalized value (dst) or NULL if no conversion
  999. * is needed.
  1000. */
  1001. static const xmlChar *
  1002. xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
  1003. {
  1004. int i;
  1005. int remove_head = 0;
  1006. int need_realloc = 0;
  1007. const xmlChar *cur;
  1008. if ((ctxt == NULL) || (src == NULL) || (len == NULL))
  1009. return(NULL);
  1010. i = *len;
  1011. if (i <= 0)
  1012. return(NULL);
  1013. cur = src;
  1014. while (*cur == 0x20) {
  1015. cur++;
  1016. remove_head++;
  1017. }
  1018. while (*cur != 0) {
  1019. if (*cur == 0x20) {
  1020. cur++;
  1021. if ((*cur == 0x20) || (*cur == 0)) {
  1022. need_realloc = 1;
  1023. break;
  1024. }
  1025. } else
  1026. cur++;
  1027. }
  1028. if (need_realloc) {
  1029. xmlChar *ret;
  1030. ret = xmlStrndup(src + remove_head, i - remove_head + 1);
  1031. if (ret == NULL) {
  1032. xmlErrMemory(ctxt, NULL);
  1033. return(NULL);
  1034. }
  1035. xmlAttrNormalizeSpace(ret, ret);
  1036. *len = (int) strlen((const char *)ret);
  1037. return(ret);
  1038. } else if (remove_head) {
  1039. *len -= remove_head;
  1040. memmove(src, src + remove_head, 1 + *len);
  1041. return(src);
  1042. }
  1043. return(NULL);
  1044. }
  1045. /**
  1046. * xmlAddDefAttrs:
  1047. * @ctxt: an XML parser context
  1048. * @fullname: the element fullname
  1049. * @fullattr: the attribute fullname
  1050. * @value: the attribute value
  1051. *
  1052. * Add a defaulted attribute for an element
  1053. */
  1054. static void
  1055. xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
  1056. const xmlChar *fullname,
  1057. const xmlChar *fullattr,
  1058. const xmlChar *value) {
  1059. xmlDefAttrsPtr defaults;
  1060. int len;
  1061. const xmlChar *name;
  1062. const xmlChar *prefix;
  1063. /*
  1064. * Allows to detect attribute redefinitions
  1065. */
  1066. if (ctxt->attsSpecial != NULL) {
  1067. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1068. return;
  1069. }
  1070. if (ctxt->attsDefault == NULL) {
  1071. ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
  1072. if (ctxt->attsDefault == NULL)
  1073. goto mem_error;
  1074. }
  1075. /*
  1076. * split the element name into prefix:localname , the string found
  1077. * are within the DTD and then not associated to namespace names.
  1078. */
  1079. name = xmlSplitQName3(fullname, &len);
  1080. if (name == NULL) {
  1081. name = xmlDictLookup(ctxt->dict, fullname, -1);
  1082. prefix = NULL;
  1083. } else {
  1084. name = xmlDictLookup(ctxt->dict, name, -1);
  1085. prefix = xmlDictLookup(ctxt->dict, fullname, len);
  1086. }
  1087. /*
  1088. * make sure there is some storage
  1089. */
  1090. defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
  1091. if (defaults == NULL) {
  1092. defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
  1093. (4 * 5) * sizeof(const xmlChar *));
  1094. if (defaults == NULL)
  1095. goto mem_error;
  1096. defaults->nbAttrs = 0;
  1097. defaults->maxAttrs = 4;
  1098. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1099. defaults, NULL) < 0) {
  1100. xmlFree(defaults);
  1101. goto mem_error;
  1102. }
  1103. } else if (defaults->nbAttrs >= defaults->maxAttrs) {
  1104. xmlDefAttrsPtr temp;
  1105. temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
  1106. (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
  1107. if (temp == NULL)
  1108. goto mem_error;
  1109. defaults = temp;
  1110. defaults->maxAttrs *= 2;
  1111. if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
  1112. defaults, NULL) < 0) {
  1113. xmlFree(defaults);
  1114. goto mem_error;
  1115. }
  1116. }
  1117. /*
  1118. * Split the element name into prefix:localname , the string found
  1119. * are within the DTD and hen not associated to namespace names.
  1120. */
  1121. name = xmlSplitQName3(fullattr, &len);
  1122. if (name == NULL) {
  1123. name = xmlDictLookup(ctxt->dict, fullattr, -1);
  1124. prefix = NULL;
  1125. } else {
  1126. name = xmlDictLookup(ctxt->dict, name, -1);
  1127. prefix = xmlDictLookup(ctxt->dict, fullattr, len);
  1128. }
  1129. defaults->values[5 * defaults->nbAttrs] = name;
  1130. defaults->values[5 * defaults->nbAttrs + 1] = prefix;
  1131. /* intern the string and precompute the end */
  1132. len = xmlStrlen(value);
  1133. value = xmlDictLookup(ctxt->dict, value, len);
  1134. defaults->values[5 * defaults->nbAttrs + 2] = value;
  1135. defaults->values[5 * defaults->nbAttrs + 3] = value + len;
  1136. if (ctxt->external)
  1137. defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
  1138. else
  1139. defaults->values[5 * defaults->nbAttrs + 4] = NULL;
  1140. defaults->nbAttrs++;
  1141. return;
  1142. mem_error:
  1143. xmlErrMemory(ctxt, NULL);
  1144. return;
  1145. }
  1146. /**
  1147. * xmlAddSpecialAttr:
  1148. * @ctxt: an XML parser context
  1149. * @fullname: the element fullname
  1150. * @fullattr: the attribute fullname
  1151. * @type: the attribute type
  1152. *
  1153. * Register this attribute type
  1154. */
  1155. static void
  1156. xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
  1157. const xmlChar *fullname,
  1158. const xmlChar *fullattr,
  1159. int type)
  1160. {
  1161. if (ctxt->attsSpecial == NULL) {
  1162. ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
  1163. if (ctxt->attsSpecial == NULL)
  1164. goto mem_error;
  1165. }
  1166. if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
  1167. return;
  1168. xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
  1169. (void *) (long) type);
  1170. return;
  1171. mem_error:
  1172. xmlErrMemory(ctxt, NULL);
  1173. return;
  1174. }
  1175. /**
  1176. * xmlCleanSpecialAttrCallback:
  1177. *
  1178. * Removes CDATA attributes from the special attribute table
  1179. */
  1180. static void
  1181. xmlCleanSpecialAttrCallback(void *payload, void *data,
  1182. const xmlChar *fullname, const xmlChar *fullattr,
  1183. const xmlChar *unused ATTRIBUTE_UNUSED) {
  1184. xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
  1185. if (((long) payload) == XML_ATTRIBUTE_CDATA) {
  1186. xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
  1187. }
  1188. }
  1189. /**
  1190. * xmlCleanSpecialAttr:
  1191. * @ctxt: an XML parser context
  1192. *
  1193. * Trim the list of attributes defined to remove all those of type
  1194. * CDATA as they are not special. This call should be done when finishing
  1195. * to parse the DTD and before starting to parse the document root.
  1196. */
  1197. static void
  1198. xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
  1199. {
  1200. if (ctxt->attsSpecial == NULL)
  1201. return;
  1202. xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
  1203. if (xmlHashSize(ctxt->attsSpecial) == 0) {
  1204. xmlHashFree(ctxt->attsSpecial, NULL);
  1205. ctxt->attsSpecial = NULL;
  1206. }
  1207. return;
  1208. }
  1209. /**
  1210. * xmlCheckLanguageID:
  1211. * @lang: pointer to the string value
  1212. *
  1213. * Checks that the value conforms to the LanguageID production:
  1214. *
  1215. * NOTE: this is somewhat deprecated, those productions were removed from
  1216. * the XML Second edition.
  1217. *
  1218. * [33] LanguageID ::= Langcode ('-' Subcode)*
  1219. * [34] Langcode ::= ISO639Code | IanaCode | UserCode
  1220. * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
  1221. * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
  1222. * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
  1223. * [38] Subcode ::= ([a-z] | [A-Z])+
  1224. *
  1225. * Returns 1 if correct 0 otherwise
  1226. **/
  1227. int
  1228. xmlCheckLanguageID(const xmlChar * lang)
  1229. {
  1230. const xmlChar *cur = lang;
  1231. if (cur == NULL)
  1232. return (0);
  1233. if (((cur[0] == 'i') && (cur[1] == '-')) ||
  1234. ((cur[0] == 'I') && (cur[1] == '-'))) {
  1235. /*
  1236. * IANA code
  1237. */
  1238. cur += 2;
  1239. while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
  1240. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1241. cur++;
  1242. } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
  1243. ((cur[0] == 'X') && (cur[1] == '-'))) {
  1244. /*
  1245. * User code
  1246. */
  1247. cur += 2;
  1248. while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
  1249. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1250. cur++;
  1251. } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
  1252. ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
  1253. /*
  1254. * ISO639
  1255. */
  1256. cur++;
  1257. if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
  1258. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1259. cur++;
  1260. else
  1261. return (0);
  1262. } else
  1263. return (0);
  1264. while (cur[0] != 0) { /* non input consuming */
  1265. if (cur[0] != '-')
  1266. return (0);
  1267. cur++;
  1268. if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
  1269. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1270. cur++;
  1271. else
  1272. return (0);
  1273. while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
  1274. ((cur[0] >= 'a') && (cur[0] <= 'z')))
  1275. cur++;
  1276. }
  1277. return (1);
  1278. }
  1279. /************************************************************************
  1280. * *
  1281. * Parser stacks related functions and macros *
  1282. * *
  1283. ************************************************************************/
  1284. static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
  1285. const xmlChar ** str);
  1286. #ifdef SAX2
  1287. /**
  1288. * nsPush:
  1289. * @ctxt: an XML parser context
  1290. * @prefix: the namespace prefix or NULL
  1291. * @URL: the namespace name
  1292. *
  1293. * Pushes a new parser namespace on top of the ns stack
  1294. *
  1295. * Returns -1 in case of error, -2 if the namespace should be discarded
  1296. * and the index in the stack otherwise.
  1297. */
  1298. static int
  1299. nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
  1300. {
  1301. if (ctxt->options & XML_PARSE_NSCLEAN) {
  1302. int i;
  1303. for (i = 0;i < ctxt->nsNr;i += 2) {
  1304. if (ctxt->nsTab[i] == prefix) {
  1305. /* in scope */
  1306. if (ctxt->nsTab[i + 1] == URL)
  1307. return(-2);
  1308. /* out of scope keep it */
  1309. break;
  1310. }
  1311. }
  1312. }
  1313. if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
  1314. ctxt->nsMax = 10;
  1315. ctxt->nsNr = 0;
  1316. ctxt->nsTab = (const xmlChar **)
  1317. xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
  1318. if (ctxt->nsTab == NULL) {
  1319. xmlErrMemory(ctxt, NULL);
  1320. ctxt->nsMax = 0;
  1321. return (-1);
  1322. }
  1323. } else if (ctxt->nsNr >= ctxt->nsMax) {
  1324. const xmlChar ** tmp;
  1325. ctxt->nsMax *= 2;
  1326. tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
  1327. ctxt->nsMax * sizeof(ctxt->nsTab[0]));
  1328. if (tmp == NULL) {
  1329. xmlErrMemory(ctxt, NULL);
  1330. ctxt->nsMax /= 2;
  1331. return (-1);
  1332. }
  1333. ctxt->nsTab = tmp;
  1334. }
  1335. ctxt->nsTab[ctxt->nsNr++] = prefix;
  1336. ctxt->nsTab[ctxt->nsNr++] = URL;
  1337. return (ctxt->nsNr);
  1338. }
  1339. /**
  1340. * nsPop:
  1341. * @ctxt: an XML parser context
  1342. * @nr: the number to pop
  1343. *
  1344. * Pops the top @nr parser prefix/namespace from the ns stack
  1345. *
  1346. * Returns the number of namespaces removed
  1347. */
  1348. static int
  1349. nsPop(xmlParserCtxtPtr ctxt, int nr)
  1350. {
  1351. int i;
  1352. if (ctxt->nsTab == NULL) return(0);
  1353. if (ctxt->nsNr < nr) {
  1354. xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
  1355. nr = ctxt->nsNr;
  1356. }
  1357. if (ctxt->nsNr <= 0)
  1358. return (0);
  1359. for (i = 0;i < nr;i++) {
  1360. ctxt->nsNr--;
  1361. ctxt->nsTab[ctxt->nsNr] = NULL;
  1362. }
  1363. return(nr);
  1364. }
  1365. #endif
  1366. static int
  1367. xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
  1368. const xmlChar **atts;
  1369. int *attallocs;
  1370. int maxatts;
  1371. if (ctxt->atts == NULL) {
  1372. maxatts = 55; /* allow for 10 attrs by default */
  1373. atts = (const xmlChar **)
  1374. xmlMalloc(maxatts * sizeof(xmlChar *));
  1375. if (atts == NULL) goto mem_error;
  1376. ctxt->atts = atts;
  1377. attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
  1378. if (attallocs == NULL) goto mem_error;
  1379. ctxt->attallocs = attallocs;
  1380. ctxt->maxatts = maxatts;
  1381. } else if (nr + 5 > ctxt->maxatts) {
  1382. maxatts = (nr + 5) * 2;
  1383. atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
  1384. maxatts * sizeof(const xmlChar *));
  1385. if (atts == NULL) goto mem_error;
  1386. ctxt->atts = atts;
  1387. attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
  1388. (maxatts / 5) * sizeof(int));
  1389. if (attallocs == NULL) goto mem_error;
  1390. ctxt->attallocs = attallocs;
  1391. ctxt->maxatts = maxatts;
  1392. }
  1393. return(ctxt->maxatts);
  1394. mem_error:
  1395. xmlErrMemory(ctxt, NULL);
  1396. return(-1);
  1397. }
  1398. /**
  1399. * inputPush:
  1400. * @ctxt: an XML parser context
  1401. * @value: the parser input
  1402. *
  1403. * Pushes a new parser input on top of the input stack
  1404. *
  1405. * Returns -1 in case of error, the index in the stack otherwise
  1406. */
  1407. int
  1408. inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
  1409. {
  1410. if ((ctxt == NULL) || (value == NULL))
  1411. return(-1);
  1412. if (ctxt->inputNr >= ctxt->inputMax) {
  1413. ctxt->inputMax *= 2;
  1414. ctxt->inputTab =
  1415. (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
  1416. ctxt->inputMax *
  1417. sizeof(ctxt->inputTab[0]));
  1418. if (ctxt->inputTab == NULL) {
  1419. xmlErrMemory(ctxt, NULL);
  1420. xmlFreeInputStream(value);
  1421. ctxt->inputMax /= 2;
  1422. value = NULL;
  1423. return (-1);
  1424. }
  1425. }
  1426. ctxt->inputTab[ctxt->inputNr] = value;
  1427. ctxt->input = value;
  1428. return (ctxt->inputNr++);
  1429. }
  1430. /**
  1431. * inputPop:
  1432. * @ctxt: an XML parser context
  1433. *
  1434. * Pops the top parser input from the input stack
  1435. *
  1436. * Returns the input just removed
  1437. */
  1438. xmlParserInputPtr
  1439. inputPop(xmlParserCtxtPtr ctxt)
  1440. {
  1441. xmlParserInputPtr ret;
  1442. if (ctxt == NULL)
  1443. return(NULL);
  1444. if (ctxt->inputNr <= 0)
  1445. return (NULL);
  1446. ctxt->inputNr--;
  1447. if (ctxt->inputNr > 0)
  1448. ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
  1449. else
  1450. ctxt->input = NULL;
  1451. ret = ctxt->inputTab[ctxt->inputNr];
  1452. ctxt->inputTab[ctxt->inputNr] = NULL;
  1453. return (ret);
  1454. }
  1455. /**
  1456. * nodePush:
  1457. * @ctxt: an XML parser context
  1458. * @value: the element node
  1459. *
  1460. * Pushes a new element node on top of the node stack
  1461. *
  1462. * Returns -1 in case of error, the index in the stack otherwise
  1463. */
  1464. int
  1465. nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
  1466. {
  1467. if (ctxt == NULL) return(0);
  1468. if (ctxt->nodeNr >= ctxt->nodeMax) {
  1469. xmlNodePtr *tmp;
  1470. tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
  1471. ctxt->nodeMax * 2 *
  1472. sizeof(ctxt->nodeTab[0]));
  1473. if (tmp == NULL) {
  1474. xmlErrMemory(ctxt, NULL);
  1475. return (-1);
  1476. }
  1477. ctxt->nodeTab = tmp;
  1478. ctxt->nodeMax *= 2;
  1479. }
  1480. if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
  1481. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  1482. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  1483. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  1484. xmlParserMaxDepth);
  1485. ctxt->instate = XML_PARSER_EOF;
  1486. return(-1);
  1487. }
  1488. ctxt->nodeTab[ctxt->nodeNr] = value;
  1489. ctxt->node = value;
  1490. return (ctxt->nodeNr++);
  1491. }
  1492. /**
  1493. * nodePop:
  1494. * @ctxt: an XML parser context
  1495. *
  1496. * Pops the top element node from the node stack
  1497. *
  1498. * Returns the node just removed
  1499. */
  1500. xmlNodePtr
  1501. nodePop(xmlParserCtxtPtr ctxt)
  1502. {
  1503. xmlNodePtr ret;
  1504. if (ctxt == NULL) return(NULL);
  1505. if (ctxt->nodeNr <= 0)
  1506. return (NULL);
  1507. ctxt->nodeNr--;
  1508. if (ctxt->nodeNr > 0)
  1509. ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
  1510. else
  1511. ctxt->node = NULL;
  1512. ret = ctxt->nodeTab[ctxt->nodeNr];
  1513. ctxt->nodeTab[ctxt->nodeNr] = NULL;
  1514. return (ret);
  1515. }
  1516. #ifdef LIBXML_PUSH_ENABLED
  1517. /**
  1518. * nameNsPush:
  1519. * @ctxt: an XML parser context
  1520. * @value: the element name
  1521. * @prefix: the element prefix
  1522. * @URI: the element namespace name
  1523. *
  1524. * Pushes a new element name/prefix/URL on top of the name stack
  1525. *
  1526. * Returns -1 in case of error, the index in the stack otherwise
  1527. */
  1528. static int
  1529. nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
  1530. const xmlChar *prefix, const xmlChar *URI, int nsNr)
  1531. {
  1532. if (ctxt->nameNr >= ctxt->nameMax) {
  1533. const xmlChar * *tmp;
  1534. void **tmp2;
  1535. ctxt->nameMax *= 2;
  1536. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1537. ctxt->nameMax *
  1538. sizeof(ctxt->nameTab[0]));
  1539. if (tmp == NULL) {
  1540. ctxt->nameMax /= 2;
  1541. goto mem_error;
  1542. }
  1543. ctxt->nameTab = tmp;
  1544. tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
  1545. ctxt->nameMax * 3 *
  1546. sizeof(ctxt->pushTab[0]));
  1547. if (tmp2 == NULL) {
  1548. ctxt->nameMax /= 2;
  1549. goto mem_error;
  1550. }
  1551. ctxt->pushTab = tmp2;
  1552. }
  1553. ctxt->nameTab[ctxt->nameNr] = value;
  1554. ctxt->name = value;
  1555. ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
  1556. ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
  1557. ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
  1558. return (ctxt->nameNr++);
  1559. mem_error:
  1560. xmlErrMemory(ctxt, NULL);
  1561. return (-1);
  1562. }
  1563. /**
  1564. * nameNsPop:
  1565. * @ctxt: an XML parser context
  1566. *
  1567. * Pops the top element/prefix/URI name from the name stack
  1568. *
  1569. * Returns the name just removed
  1570. */
  1571. static const xmlChar *
  1572. nameNsPop(xmlParserCtxtPtr ctxt)
  1573. {
  1574. const xmlChar *ret;
  1575. if (ctxt->nameNr <= 0)
  1576. return (NULL);
  1577. ctxt->nameNr--;
  1578. if (ctxt->nameNr > 0)
  1579. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1580. else
  1581. ctxt->name = NULL;
  1582. ret = ctxt->nameTab[ctxt->nameNr];
  1583. ctxt->nameTab[ctxt->nameNr] = NULL;
  1584. return (ret);
  1585. }
  1586. #endif /* LIBXML_PUSH_ENABLED */
  1587. /**
  1588. * namePush:
  1589. * @ctxt: an XML parser context
  1590. * @value: the element name
  1591. *
  1592. * Pushes a new element name on top of the name stack
  1593. *
  1594. * Returns -1 in case of error, the index in the stack otherwise
  1595. */
  1596. int
  1597. namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
  1598. {
  1599. if (ctxt == NULL) return (-1);
  1600. if (ctxt->nameNr >= ctxt->nameMax) {
  1601. const xmlChar * *tmp;
  1602. ctxt->nameMax *= 2;
  1603. tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
  1604. ctxt->nameMax *
  1605. sizeof(ctxt->nameTab[0]));
  1606. if (tmp == NULL) {
  1607. ctxt->nameMax /= 2;
  1608. goto mem_error;
  1609. }
  1610. ctxt->nameTab = tmp;
  1611. }
  1612. ctxt->nameTab[ctxt->nameNr] = value;
  1613. ctxt->name = value;
  1614. return (ctxt->nameNr++);
  1615. mem_error:
  1616. xmlErrMemory(ctxt, NULL);
  1617. return (-1);
  1618. }
  1619. /**
  1620. * namePop:
  1621. * @ctxt: an XML parser context
  1622. *
  1623. * Pops the top element name from the name stack
  1624. *
  1625. * Returns the name just removed
  1626. */
  1627. const xmlChar *
  1628. namePop(xmlParserCtxtPtr ctxt)
  1629. {
  1630. const xmlChar *ret;
  1631. if ((ctxt == NULL) || (ctxt->nameNr <= 0))
  1632. return (NULL);
  1633. ctxt->nameNr--;
  1634. if (ctxt->nameNr > 0)
  1635. ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
  1636. else
  1637. ctxt->name = NULL;
  1638. ret = ctxt->nameTab[ctxt->nameNr];
  1639. ctxt->nameTab[ctxt->nameNr] = NULL;
  1640. return (ret);
  1641. }
  1642. static int spacePush(xmlParserCtxtPtr ctxt, int val) {
  1643. if (ctxt->spaceNr >= ctxt->spaceMax) {
  1644. int *tmp;
  1645. ctxt->spaceMax *= 2;
  1646. tmp = (int *) xmlRealloc(ctxt->spaceTab,
  1647. ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
  1648. if (tmp == NULL) {
  1649. xmlErrMemory(ctxt, NULL);
  1650. ctxt->spaceMax /=2;
  1651. return(-1);
  1652. }
  1653. ctxt->spaceTab = tmp;
  1654. }
  1655. ctxt->spaceTab[ctxt->spaceNr] = val;
  1656. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
  1657. return(ctxt->spaceNr++);
  1658. }
  1659. static int spacePop(xmlParserCtxtPtr ctxt) {
  1660. int ret;
  1661. if (ctxt->spaceNr <= 0) return(0);
  1662. ctxt->spaceNr--;
  1663. if (ctxt->spaceNr > 0)
  1664. ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
  1665. else
  1666. ctxt->space = &ctxt->spaceTab[0];
  1667. ret = ctxt->spaceTab[ctxt->spaceNr];
  1668. ctxt->spaceTab[ctxt->spaceNr] = -1;
  1669. return(ret);
  1670. }
  1671. /*
  1672. * Macros for accessing the content. Those should be used only by the parser,
  1673. * and not exported.
  1674. *
  1675. * Dirty macros, i.e. one often need to make assumption on the context to
  1676. * use them
  1677. *
  1678. * CUR_PTR return the current pointer to the xmlChar to be parsed.
  1679. * To be used with extreme caution since operations consuming
  1680. * characters may move the input buffer to a different location !
  1681. * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
  1682. * This should be used internally by the parser
  1683. * only to compare to ASCII values otherwise it would break when
  1684. * running with UTF-8 encoding.
  1685. * RAW same as CUR but in the input buffer, bypass any token
  1686. * extraction that may have been done
  1687. * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
  1688. * to compare on ASCII based substring.
  1689. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
  1690. * strings without newlines within the parser.
  1691. * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
  1692. * defined char within the parser.
  1693. * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
  1694. *
  1695. * NEXT Skip to the next character, this does the proper decoding
  1696. * in UTF-8 mode. It also pop-up unfinished entities on the fly.
  1697. * NEXTL(l) Skip the current unicode character of l xmlChars long.
  1698. * CUR_CHAR(l) returns the current unicode character (int), set l
  1699. * to the number of xmlChars used for the encoding [0-5].
  1700. * CUR_SCHAR same but operate on a string instead of the context
  1701. * COPY_BUF copy the current unicode char to the target buffer, increment
  1702. * the index
  1703. * GROW, SHRINK handling of input buffers
  1704. */
  1705. #define RAW (*ctxt->input->cur)
  1706. #define CUR (*ctxt->input->cur)
  1707. #define NXT(val) ctxt->input->cur[(val)]
  1708. #define CUR_PTR ctxt->input->cur
  1709. #define CMP4( s, c1, c2, c3, c4 ) \
  1710. ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
  1711. ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
  1712. #define CMP5( s, c1, c2, c3, c4, c5 ) \
  1713. ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
  1714. #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
  1715. ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
  1716. #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
  1717. ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
  1718. #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
  1719. ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
  1720. #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
  1721. ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
  1722. ((unsigned char *) s)[ 8 ] == c9 )
  1723. #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
  1724. ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
  1725. ((unsigned char *) s)[ 9 ] == c10 )
  1726. #define SKIP(val) do { \
  1727. ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
  1728. if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
  1729. if ((*ctxt->input->cur == 0) && \
  1730. (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
  1731. xmlPopInput(ctxt); \
  1732. } while (0)
  1733. #define SKIPL(val) do { \
  1734. int skipl; \
  1735. for(skipl=0; skipl<val; skipl++) { \
  1736. if (*(ctxt->input->cur) == '\n') { \
  1737. ctxt->input->line++; ctxt->input->col = 1; \
  1738. } else ctxt->input->col++; \
  1739. ctxt->nbChars++; \
  1740. ctxt->input->cur++; \
  1741. } \
  1742. if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
  1743. if ((*ctxt->input->cur == 0) && \
  1744. (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
  1745. xmlPopInput(ctxt); \
  1746. } while (0)
  1747. #define SHRINK if ((ctxt->progressive == 0) && \
  1748. (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
  1749. (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
  1750. xmlSHRINK (ctxt);
  1751. static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
  1752. xmlParserInputShrink(ctxt->input);
  1753. if ((*ctxt->input->cur == 0) &&
  1754. (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
  1755. xmlPopInput(ctxt);
  1756. }
  1757. #define GROW if ((ctxt->progressive == 0) && \
  1758. (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
  1759. xmlGROW (ctxt);
  1760. static void xmlGROW (xmlParserCtxtPtr ctxt) {
  1761. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  1762. if ((*ctxt->input->cur == 0) &&
  1763. (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
  1764. xmlPopInput(ctxt);
  1765. }
  1766. #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
  1767. #define NEXT xmlNextChar(ctxt)
  1768. #define NEXT1 { \
  1769. ctxt->input->col++; \
  1770. ctxt->input->cur++; \
  1771. ctxt->nbChars++; \
  1772. if (*ctxt->input->cur == 0) \
  1773. xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
  1774. }
  1775. #define NEXTL(l) do { \
  1776. if (*(ctxt->input->cur) == '\n') { \
  1777. ctxt->input->line++; ctxt->input->col = 1; \
  1778. } else ctxt->input->col++; \
  1779. ctxt->input->cur += l; \
  1780. if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
  1781. } while (0)
  1782. #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
  1783. #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
  1784. #define COPY_BUF(l,b,i,v) \
  1785. if (l == 1) b[i++] = (xmlChar) v; \
  1786. else i += xmlCopyCharMultiByte(&b[i],v)
  1787. /**
  1788. * xmlSkipBlankChars:
  1789. * @ctxt: the XML parser context
  1790. *
  1791. * skip all blanks character found at that point in the input streams.
  1792. * It pops up finished entities in the process if allowable at that point.
  1793. *
  1794. * Returns the number of space chars skipped
  1795. */
  1796. int
  1797. xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
  1798. int res = 0;
  1799. /*
  1800. * It's Okay to use CUR/NEXT here since all the blanks are on
  1801. * the ASCII range.
  1802. */
  1803. if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
  1804. const xmlChar *cur;
  1805. /*
  1806. * if we are in the document content, go really fast
  1807. */
  1808. cur = ctxt->input->cur;
  1809. while (IS_BLANK_CH(*cur)) {
  1810. if (*cur == '\n') {
  1811. ctxt->input->line++; ctxt->input->col = 1;
  1812. }
  1813. cur++;
  1814. res++;
  1815. if (*cur == 0) {
  1816. ctxt->input->cur = cur;
  1817. xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
  1818. cur = ctxt->input->cur;
  1819. }
  1820. }
  1821. ctxt->input->cur = cur;
  1822. } else {
  1823. int cur;
  1824. do {
  1825. cur = CUR;
  1826. while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
  1827. NEXT;
  1828. cur = CUR;
  1829. res++;
  1830. }
  1831. while ((cur == 0) && (ctxt->inputNr > 1) &&
  1832. (ctxt->instate != XML_PARSER_COMMENT)) {
  1833. xmlPopInput(ctxt);
  1834. cur = CUR;
  1835. }
  1836. /*
  1837. * Need to handle support of entities branching here
  1838. */
  1839. if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
  1840. } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
  1841. }
  1842. return(res);
  1843. }
  1844. /************************************************************************
  1845. * *
  1846. * Commodity functions to handle entities *
  1847. * *
  1848. ************************************************************************/
  1849. /**
  1850. * xmlPopInput:
  1851. * @ctxt: an XML parser context
  1852. *
  1853. * xmlPopInput: the current input pointed by ctxt->input came to an end
  1854. * pop it and return the next char.
  1855. *
  1856. * Returns the current xmlChar in the parser context
  1857. */
  1858. xmlChar
  1859. xmlPopInput(xmlParserCtxtPtr ctxt) {
  1860. if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
  1861. if (xmlParserDebugEntities)
  1862. xmlGenericError(xmlGenericErrorContext,
  1863. "Popping input %d\n", ctxt->inputNr);
  1864. xmlFreeInputStream(inputPop(ctxt));
  1865. if ((*ctxt->input->cur == 0) &&
  1866. (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
  1867. return(xmlPopInput(ctxt));
  1868. return(CUR);
  1869. }
  1870. /**
  1871. * xmlPushInput:
  1872. * @ctxt: an XML parser context
  1873. * @input: an XML parser input fragment (entity, XML fragment ...).
  1874. *
  1875. * xmlPushInput: switch to a new input stream which is stacked on top
  1876. * of the previous one(s).
  1877. * Returns -1 in case of error or the index in the input stack
  1878. */
  1879. int
  1880. xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
  1881. int ret;
  1882. if (input == NULL) return(-1);
  1883. if (xmlParserDebugEntities) {
  1884. if ((ctxt->input != NULL) && (ctxt->input->filename))
  1885. xmlGenericError(xmlGenericErrorContext,
  1886. "%s(%d): ", ctxt->input->filename,
  1887. ctxt->input->line);
  1888. xmlGenericError(xmlGenericErrorContext,
  1889. "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
  1890. }
  1891. ret = inputPush(ctxt, input);
  1892. GROW;
  1893. return(ret);
  1894. }
  1895. /**
  1896. * xmlParseCharRef:
  1897. * @ctxt: an XML parser context
  1898. *
  1899. * parse Reference declarations
  1900. *
  1901. * [66] CharRef ::= '&#' [0-9]+ ';' |
  1902. * '&#x' [0-9a-fA-F]+ ';'
  1903. *
  1904. * [ WFC: Legal Character ]
  1905. * Characters referred to using character references must match the
  1906. * production for Char.
  1907. *
  1908. * Returns the value parsed (as an int), 0 in case of error
  1909. */
  1910. int
  1911. xmlParseCharRef(xmlParserCtxtPtr ctxt) {
  1912. unsigned int val = 0;
  1913. int count = 0;
  1914. unsigned int outofrange = 0;
  1915. /*
  1916. * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
  1917. */
  1918. if ((RAW == '&') && (NXT(1) == '#') &&
  1919. (NXT(2) == 'x')) {
  1920. SKIP(3);
  1921. GROW;
  1922. while (RAW != ';') { /* loop blocked by count */
  1923. if (count++ > 20) {
  1924. count = 0;
  1925. GROW;
  1926. }
  1927. if ((RAW >= '0') && (RAW <= '9'))
  1928. val = val * 16 + (CUR - '0');
  1929. else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
  1930. val = val * 16 + (CUR - 'a') + 10;
  1931. else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
  1932. val = val * 16 + (CUR - 'A') + 10;
  1933. else {
  1934. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  1935. val = 0;
  1936. break;
  1937. }
  1938. if (val > 0x10FFFF)
  1939. outofrange = val;
  1940. NEXT;
  1941. count++;
  1942. }
  1943. if (RAW == ';') {
  1944. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  1945. ctxt->input->col++;
  1946. ctxt->nbChars ++;
  1947. ctxt->input->cur++;
  1948. }
  1949. } else if ((RAW == '&') && (NXT(1) == '#')) {
  1950. SKIP(2);
  1951. GROW;
  1952. while (RAW != ';') { /* loop blocked by count */
  1953. if (count++ > 20) {
  1954. count = 0;
  1955. GROW;
  1956. }
  1957. if ((RAW >= '0') && (RAW <= '9'))
  1958. val = val * 10 + (CUR - '0');
  1959. else {
  1960. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  1961. val = 0;
  1962. break;
  1963. }
  1964. if (val > 0x10FFFF)
  1965. outofrange = val;
  1966. NEXT;
  1967. count++;
  1968. }
  1969. if (RAW == ';') {
  1970. /* on purpose to avoid reentrancy problems with NEXT and SKIP */
  1971. ctxt->input->col++;
  1972. ctxt->nbChars ++;
  1973. ctxt->input->cur++;
  1974. }
  1975. } else {
  1976. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  1977. }
  1978. /*
  1979. * [ WFC: Legal Character ]
  1980. * Characters referred to using character references must match the
  1981. * production for Char.
  1982. */
  1983. if ((IS_CHAR(val) && (outofrange == 0))) {
  1984. return(val);
  1985. } else {
  1986. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  1987. "xmlParseCharRef: invalid xmlChar value %d\n",
  1988. val);
  1989. }
  1990. return(0);
  1991. }
  1992. /**
  1993. * xmlParseStringCharRef:
  1994. * @ctxt: an XML parser context
  1995. * @str: a pointer to an index in the string
  1996. *
  1997. * parse Reference declarations, variant parsing from a string rather
  1998. * than an an input flow.
  1999. *
  2000. * [66] CharRef ::= '&#' [0-9]+ ';' |
  2001. * '&#x' [0-9a-fA-F]+ ';'
  2002. *
  2003. * [ WFC: Legal Character ]
  2004. * Characters referred to using character references must match the
  2005. * production for Char.
  2006. *
  2007. * Returns the value parsed (as an int), 0 in case of error, str will be
  2008. * updated to the current value of the index
  2009. */
  2010. static int
  2011. xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  2012. const xmlChar *ptr;
  2013. xmlChar cur;
  2014. unsigned int val = 0;
  2015. unsigned int outofrange = 0;
  2016. if ((str == NULL) || (*str == NULL)) return(0);
  2017. ptr = *str;
  2018. cur = *ptr;
  2019. if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
  2020. ptr += 3;
  2021. cur = *ptr;
  2022. while (cur != ';') { /* Non input consuming loop */
  2023. if ((cur >= '0') && (cur <= '9'))
  2024. val = val * 16 + (cur - '0');
  2025. else if ((cur >= 'a') && (cur <= 'f'))
  2026. val = val * 16 + (cur - 'a') + 10;
  2027. else if ((cur >= 'A') && (cur <= 'F'))
  2028. val = val * 16 + (cur - 'A') + 10;
  2029. else {
  2030. xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
  2031. val = 0;
  2032. break;
  2033. }
  2034. if (val > 0x10FFFF)
  2035. outofrange = val;
  2036. ptr++;
  2037. cur = *ptr;
  2038. }
  2039. if (cur == ';')
  2040. ptr++;
  2041. } else if ((cur == '&') && (ptr[1] == '#')){
  2042. ptr += 2;
  2043. cur = *ptr;
  2044. while (cur != ';') { /* Non input consuming loops */
  2045. if ((cur >= '0') && (cur <= '9'))
  2046. val = val * 10 + (cur - '0');
  2047. else {
  2048. xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
  2049. val = 0;
  2050. break;
  2051. }
  2052. if (val > 0x10FFFF)
  2053. outofrange = val;
  2054. ptr++;
  2055. cur = *ptr;
  2056. }
  2057. if (cur == ';')
  2058. ptr++;
  2059. } else {
  2060. xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
  2061. return(0);
  2062. }
  2063. *str = ptr;
  2064. /*
  2065. * [ WFC: Legal Character ]
  2066. * Characters referred to using character references must match the
  2067. * production for Char.
  2068. */
  2069. if ((IS_CHAR(val) && (outofrange == 0))) {
  2070. return(val);
  2071. } else {
  2072. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  2073. "xmlParseStringCharRef: invalid xmlChar value %d\n",
  2074. val);
  2075. }
  2076. return(0);
  2077. }
  2078. /**
  2079. * xmlNewBlanksWrapperInputStream:
  2080. * @ctxt: an XML parser context
  2081. * @entity: an Entity pointer
  2082. *
  2083. * Create a new input stream for wrapping
  2084. * blanks around a PEReference
  2085. *
  2086. * Returns the new input stream or NULL
  2087. */
  2088. static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
  2089. static xmlParserInputPtr
  2090. xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
  2091. xmlParserInputPtr input;
  2092. xmlChar *buffer;
  2093. size_t length;
  2094. if (entity == NULL) {
  2095. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  2096. "xmlNewBlanksWrapperInputStream entity\n");
  2097. return(NULL);
  2098. }
  2099. if (xmlParserDebugEntities)
  2100. xmlGenericError(xmlGenericErrorContext,
  2101. "new blanks wrapper for entity: %s\n", entity->name);
  2102. input = xmlNewInputStream(ctxt);
  2103. if (input == NULL) {
  2104. return(NULL);
  2105. }
  2106. length = xmlStrlen(entity->name) + 5;
  2107. buffer = xmlMallocAtomic(length);
  2108. if (buffer == NULL) {
  2109. xmlErrMemory(ctxt, NULL);
  2110. xmlFree(input);
  2111. return(NULL);
  2112. }
  2113. buffer [0] = ' ';
  2114. buffer [1] = '%';
  2115. buffer [length-3] = ';';
  2116. buffer [length-2] = ' ';
  2117. buffer [length-1] = 0;
  2118. memcpy(buffer + 2, entity->name, length - 5);
  2119. input->free = deallocblankswrapper;
  2120. input->base = buffer;
  2121. input->cur = buffer;
  2122. input->length = length;
  2123. input->end = &buffer[length];
  2124. return(input);
  2125. }
  2126. /**
  2127. * xmlParserHandlePEReference:
  2128. * @ctxt: the parser context
  2129. *
  2130. * [69] PEReference ::= '%' Name ';'
  2131. *
  2132. * [ WFC: No Recursion ]
  2133. * A parsed entity must not contain a recursive
  2134. * reference to itself, either directly or indirectly.
  2135. *
  2136. * [ WFC: Entity Declared ]
  2137. * In a document without any DTD, a document with only an internal DTD
  2138. * subset which contains no parameter entity references, or a document
  2139. * with "standalone='yes'", ... ... The declaration of a parameter
  2140. * entity must precede any reference to it...
  2141. *
  2142. * [ VC: Entity Declared ]
  2143. * In a document with an external subset or external parameter entities
  2144. * with "standalone='no'", ... ... The declaration of a parameter entity
  2145. * must precede any reference to it...
  2146. *
  2147. * [ WFC: In DTD ]
  2148. * Parameter-entity references may only appear in the DTD.
  2149. * NOTE: misleading but this is handled.
  2150. *
  2151. * A PEReference may have been detected in the current input stream
  2152. * the handling is done accordingly to
  2153. * http://www.w3.org/TR/REC-xml#entproc
  2154. * i.e.
  2155. * - Included in literal in entity values
  2156. * - Included as Parameter Entity reference within DTDs
  2157. */
  2158. void
  2159. xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
  2160. const xmlChar *name;
  2161. xmlEntityPtr entity = NULL;
  2162. xmlParserInputPtr input;
  2163. if (RAW != '%') return;
  2164. switch(ctxt->instate) {
  2165. case XML_PARSER_CDATA_SECTION:
  2166. return;
  2167. case XML_PARSER_COMMENT:
  2168. return;
  2169. case XML_PARSER_START_TAG:
  2170. return;
  2171. case XML_PARSER_END_TAG:
  2172. return;
  2173. case XML_PARSER_EOF:
  2174. xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
  2175. return;
  2176. case XML_PARSER_PROLOG:
  2177. case XML_PARSER_START:
  2178. case XML_PARSER_MISC:
  2179. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
  2180. return;
  2181. case XML_PARSER_ENTITY_DECL:
  2182. case XML_PARSER_CONTENT:
  2183. case XML_PARSER_ATTRIBUTE_VALUE:
  2184. case XML_PARSER_PI:
  2185. case XML_PARSER_SYSTEM_LITERAL:
  2186. case XML_PARSER_PUBLIC_LITERAL:
  2187. /* we just ignore it there */
  2188. return;
  2189. case XML_PARSER_EPILOG:
  2190. xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
  2191. return;
  2192. case XML_PARSER_ENTITY_VALUE:
  2193. /*
  2194. * NOTE: in the case of entity values, we don't do the
  2195. * substitution here since we need the literal
  2196. * entity value to be able to save the internal
  2197. * subset of the document.
  2198. * This will be handled by xmlStringDecodeEntities
  2199. */
  2200. return;
  2201. case XML_PARSER_DTD:
  2202. /*
  2203. * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
  2204. * In the internal DTD subset, parameter-entity references
  2205. * can occur only where markup declarations can occur, not
  2206. * within markup declarations.
  2207. * In that case this is handled in xmlParseMarkupDecl
  2208. */
  2209. if ((ctxt->external == 0) && (ctxt->inputNr == 1))
  2210. return;
  2211. if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
  2212. return;
  2213. break;
  2214. case XML_PARSER_IGNORE:
  2215. return;
  2216. }
  2217. NEXT;
  2218. name = xmlParseName(ctxt);
  2219. if (xmlParserDebugEntities)
  2220. xmlGenericError(xmlGenericErrorContext,
  2221. "PEReference: %s\n", name);
  2222. if (name == NULL) {
  2223. xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
  2224. } else {
  2225. if (RAW == ';') {
  2226. NEXT;
  2227. if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
  2228. entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
  2229. if (entity == NULL) {
  2230. /*
  2231. * [ WFC: Entity Declared ]
  2232. * In a document without any DTD, a document with only an
  2233. * internal DTD subset which contains no parameter entity
  2234. * references, or a document with "standalone='yes'", ...
  2235. * ... The declaration of a parameter entity must precede
  2236. * any reference to it...
  2237. */
  2238. if ((ctxt->standalone == 1) ||
  2239. ((ctxt->hasExternalSubset == 0) &&
  2240. (ctxt->hasPErefs == 0))) {
  2241. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  2242. "PEReference: %%%s; not found\n", name);
  2243. } else {
  2244. /*
  2245. * [ VC: Entity Declared ]
  2246. * In a document with an external subset or external
  2247. * parameter entities with "standalone='no'", ...
  2248. * ... The declaration of a parameter entity must precede
  2249. * any reference to it...
  2250. */
  2251. if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
  2252. xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
  2253. "PEReference: %%%s; not found\n",
  2254. name, NULL);
  2255. } else
  2256. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  2257. "PEReference: %%%s; not found\n",
  2258. name, NULL);
  2259. ctxt->valid = 0;
  2260. }
  2261. } else if (ctxt->input->free != deallocblankswrapper) {
  2262. input = xmlNewBlanksWrapperInputStream(ctxt, entity);
  2263. if (xmlPushInput(ctxt, input) < 0)
  2264. return;
  2265. } else {
  2266. if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
  2267. (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
  2268. xmlChar start[4];
  2269. xmlCharEncoding enc;
  2270. /*
  2271. * handle the extra spaces added before and after
  2272. * c.f. http://www.w3.org/TR/REC-xml#as-PE
  2273. * this is done independently.
  2274. */
  2275. input = xmlNewEntityInputStream(ctxt, entity);
  2276. if (xmlPushInput(ctxt, input) < 0)
  2277. return;
  2278. /*
  2279. * Get the 4 first bytes and decode the charset
  2280. * if enc != XML_CHAR_ENCODING_NONE
  2281. * plug some encoding conversion routines.
  2282. * Note that, since we may have some non-UTF8
  2283. * encoding (like UTF16, bug 135229), the 'length'
  2284. * is not known, but we can calculate based upon
  2285. * the amount of data in the buffer.
  2286. */
  2287. GROW
  2288. if ((ctxt->input->end - ctxt->input->cur)>=4) {
  2289. start[0] = RAW;
  2290. start[1] = NXT(1);
  2291. start[2] = NXT(2);
  2292. start[3] = NXT(3);
  2293. enc = xmlDetectCharEncoding(start, 4);
  2294. if (enc != XML_CHAR_ENCODING_NONE) {
  2295. xmlSwitchEncoding(ctxt, enc);
  2296. }
  2297. }
  2298. if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
  2299. (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
  2300. (IS_BLANK_CH(NXT(5)))) {
  2301. xmlParseTextDecl(ctxt);
  2302. }
  2303. } else {
  2304. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  2305. "PEReference: %s is not a parameter entity\n",
  2306. name);
  2307. }
  2308. }
  2309. } else {
  2310. xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
  2311. }
  2312. }
  2313. }
  2314. /*
  2315. * Macro used to grow the current buffer.
  2316. */
  2317. #define growBuffer(buffer, n) { \
  2318. xmlChar *tmp; \
  2319. buffer##_size *= 2; \
  2320. buffer##_size += n; \
  2321. tmp = (xmlChar *) \
  2322. xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
  2323. if (tmp == NULL) goto mem_error; \
  2324. buffer = tmp; \
  2325. }
  2326. /**
  2327. * xmlStringLenDecodeEntities:
  2328. * @ctxt: the parser context
  2329. * @str: the input string
  2330. * @len: the string length
  2331. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2332. * @end: an end marker xmlChar, 0 if none
  2333. * @end2: an end marker xmlChar, 0 if none
  2334. * @end3: an end marker xmlChar, 0 if none
  2335. *
  2336. * Takes a entity string content and process to do the adequate substitutions.
  2337. *
  2338. * [67] Reference ::= EntityRef | CharRef
  2339. *
  2340. * [69] PEReference ::= '%' Name ';'
  2341. *
  2342. * Returns A newly allocated string with the substitution done. The caller
  2343. * must deallocate it !
  2344. */
  2345. xmlChar *
  2346. xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2347. int what, xmlChar end, xmlChar end2, xmlChar end3) {
  2348. xmlChar *buffer = NULL;
  2349. int buffer_size = 0;
  2350. xmlChar *current = NULL;
  2351. xmlChar *rep = NULL;
  2352. const xmlChar *last;
  2353. xmlEntityPtr ent;
  2354. int c,l;
  2355. int nbchars = 0;
  2356. if ((ctxt == NULL) || (str == NULL) || (len < 0))
  2357. return(NULL);
  2358. last = str + len;
  2359. if (((ctxt->depth > 40) &&
  2360. ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  2361. (ctxt->depth > 1024)) {
  2362. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  2363. return(NULL);
  2364. }
  2365. /*
  2366. * allocate a translation buffer.
  2367. */
  2368. buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
  2369. buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
  2370. if (buffer == NULL) goto mem_error;
  2371. /*
  2372. * OK loop until we reach one of the ending char or a size limit.
  2373. * we are operating on already parsed values.
  2374. */
  2375. if (str < last)
  2376. c = CUR_SCHAR(str, l);
  2377. else
  2378. c = 0;
  2379. while ((c != 0) && (c != end) && /* non input consuming loop */
  2380. (c != end2) && (c != end3)) {
  2381. if (c == 0) break;
  2382. if ((c == '&') && (str[1] == '#')) {
  2383. int val = xmlParseStringCharRef(ctxt, &str);
  2384. if (val != 0) {
  2385. COPY_BUF(0,buffer,nbchars,val);
  2386. }
  2387. if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
  2388. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2389. }
  2390. } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
  2391. if (xmlParserDebugEntities)
  2392. xmlGenericError(xmlGenericErrorContext,
  2393. "String decoding Entity Reference: %.30s\n",
  2394. str);
  2395. ent = xmlParseStringEntityRef(ctxt, &str);
  2396. if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
  2397. (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
  2398. goto int_error;
  2399. if (ent != NULL)
  2400. ctxt->nbentities += ent->checked;
  2401. if ((ent != NULL) &&
  2402. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  2403. if (ent->content != NULL) {
  2404. COPY_BUF(0,buffer,nbchars,ent->content[0]);
  2405. if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
  2406. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2407. }
  2408. } else {
  2409. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  2410. "predefined entity has no content\n");
  2411. }
  2412. } else if ((ent != NULL) && (ent->content != NULL)) {
  2413. ctxt->depth++;
  2414. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2415. 0, 0, 0);
  2416. ctxt->depth--;
  2417. if (rep != NULL) {
  2418. current = rep;
  2419. while (*current != 0) { /* non input consuming loop */
  2420. buffer[nbchars++] = *current++;
  2421. if (nbchars >
  2422. buffer_size - XML_PARSER_BUFFER_SIZE) {
  2423. if (xmlParserEntityCheck(ctxt, nbchars, ent))
  2424. goto int_error;
  2425. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2426. }
  2427. }
  2428. xmlFree(rep);
  2429. rep = NULL;
  2430. }
  2431. } else if (ent != NULL) {
  2432. int i = xmlStrlen(ent->name);
  2433. const xmlChar *cur = ent->name;
  2434. buffer[nbchars++] = '&';
  2435. if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
  2436. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2437. }
  2438. for (;i > 0;i--)
  2439. buffer[nbchars++] = *cur++;
  2440. buffer[nbchars++] = ';';
  2441. }
  2442. } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
  2443. if (xmlParserDebugEntities)
  2444. xmlGenericError(xmlGenericErrorContext,
  2445. "String decoding PE Reference: %.30s\n", str);
  2446. ent = xmlParseStringPEReference(ctxt, &str);
  2447. if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
  2448. goto int_error;
  2449. if (ent != NULL)
  2450. ctxt->nbentities += ent->checked;
  2451. if (ent != NULL) {
  2452. if (ent->content == NULL) {
  2453. xmlLoadEntityContent(ctxt, ent);
  2454. }
  2455. ctxt->depth++;
  2456. rep = xmlStringDecodeEntities(ctxt, ent->content, what,
  2457. 0, 0, 0);
  2458. ctxt->depth--;
  2459. if (rep != NULL) {
  2460. current = rep;
  2461. while (*current != 0) { /* non input consuming loop */
  2462. buffer[nbchars++] = *current++;
  2463. if (nbchars >
  2464. buffer_size - XML_PARSER_BUFFER_SIZE) {
  2465. if (xmlParserEntityCheck(ctxt, nbchars, ent))
  2466. goto int_error;
  2467. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2468. }
  2469. }
  2470. xmlFree(rep);
  2471. rep = NULL;
  2472. }
  2473. }
  2474. } else {
  2475. COPY_BUF(l,buffer,nbchars,c);
  2476. str += l;
  2477. if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
  2478. growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
  2479. }
  2480. }
  2481. if (str < last)
  2482. c = CUR_SCHAR(str, l);
  2483. else
  2484. c = 0;
  2485. }
  2486. buffer[nbchars] = 0;
  2487. return(buffer);
  2488. mem_error:
  2489. xmlErrMemory(ctxt, NULL);
  2490. int_error:
  2491. if (rep != NULL)
  2492. xmlFree(rep);
  2493. if (buffer != NULL)
  2494. xmlFree(buffer);
  2495. return(NULL);
  2496. }
  2497. /**
  2498. * xmlStringDecodeEntities:
  2499. * @ctxt: the parser context
  2500. * @str: the input string
  2501. * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
  2502. * @end: an end marker xmlChar, 0 if none
  2503. * @end2: an end marker xmlChar, 0 if none
  2504. * @end3: an end marker xmlChar, 0 if none
  2505. *
  2506. * Takes a entity string content and process to do the adequate substitutions.
  2507. *
  2508. * [67] Reference ::= EntityRef | CharRef
  2509. *
  2510. * [69] PEReference ::= '%' Name ';'
  2511. *
  2512. * Returns A newly allocated string with the substitution done. The caller
  2513. * must deallocate it !
  2514. */
  2515. xmlChar *
  2516. xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
  2517. xmlChar end, xmlChar end2, xmlChar end3) {
  2518. if ((ctxt == NULL) || (str == NULL)) return(NULL);
  2519. return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
  2520. end, end2, end3));
  2521. }
  2522. /************************************************************************
  2523. * *
  2524. * Commodity functions, cleanup needed ? *
  2525. * *
  2526. ************************************************************************/
  2527. /**
  2528. * areBlanks:
  2529. * @ctxt: an XML parser context
  2530. * @str: a xmlChar *
  2531. * @len: the size of @str
  2532. * @blank_chars: we know the chars are blanks
  2533. *
  2534. * Is this a sequence of blank chars that one can ignore ?
  2535. *
  2536. * Returns 1 if ignorable 0 otherwise.
  2537. */
  2538. static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
  2539. int blank_chars) {
  2540. int i, ret;
  2541. xmlNodePtr lastChild;
  2542. /*
  2543. * Don't spend time trying to differentiate them, the same callback is
  2544. * used !
  2545. */
  2546. if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
  2547. return(0);
  2548. /*
  2549. * Check for xml:space value.
  2550. */
  2551. if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
  2552. (*(ctxt->space) == -2))
  2553. return(0);
  2554. /*
  2555. * Check that the string is made of blanks
  2556. */
  2557. if (blank_chars == 0) {
  2558. for (i = 0;i < len;i++)
  2559. if (!(IS_BLANK_CH(str[i]))) return(0);
  2560. }
  2561. /*
  2562. * Look if the element is mixed content in the DTD if available
  2563. */
  2564. if (ctxt->node == NULL) return(0);
  2565. if (ctxt->myDoc != NULL) {
  2566. ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
  2567. if (ret == 0) return(1);
  2568. if (ret == 1) return(0);
  2569. }
  2570. /*
  2571. * Otherwise, heuristic :-\
  2572. */
  2573. if ((RAW != '<') && (RAW != 0xD)) return(0);
  2574. if ((ctxt->node->children == NULL) &&
  2575. (RAW == '<') && (NXT(1) == '/')) return(0);
  2576. lastChild = xmlGetLastChild(ctxt->node);
  2577. if (lastChild == NULL) {
  2578. if ((ctxt->node->type != XML_ELEMENT_NODE) &&
  2579. (ctxt->node->content != NULL)) return(0);
  2580. } else if (xmlNodeIsText(lastChild))
  2581. return(0);
  2582. else if ((ctxt->node->children != NULL) &&
  2583. (xmlNodeIsText(ctxt->node->children)))
  2584. return(0);
  2585. return(1);
  2586. }
  2587. /************************************************************************
  2588. * *
  2589. * Extra stuff for namespace support *
  2590. * Relates to http://www.w3.org/TR/WD-xml-names *
  2591. * *
  2592. ************************************************************************/
  2593. /**
  2594. * xmlSplitQName:
  2595. * @ctxt: an XML parser context
  2596. * @name: an XML parser context
  2597. * @prefix: a xmlChar **
  2598. *
  2599. * parse an UTF8 encoded XML qualified name string
  2600. *
  2601. * [NS 5] QName ::= (Prefix ':')? LocalPart
  2602. *
  2603. * [NS 6] Prefix ::= NCName
  2604. *
  2605. * [NS 7] LocalPart ::= NCName
  2606. *
  2607. * Returns the local part, and prefix is updated
  2608. * to get the Prefix if any.
  2609. */
  2610. xmlChar *
  2611. xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
  2612. xmlChar buf[XML_MAX_NAMELEN + 5];
  2613. xmlChar *buffer = NULL;
  2614. int len = 0;
  2615. int max = XML_MAX_NAMELEN;
  2616. xmlChar *ret = NULL;
  2617. const xmlChar *cur = name;
  2618. int c;
  2619. if (prefix == NULL) return(NULL);
  2620. *prefix = NULL;
  2621. if (cur == NULL) return(NULL);
  2622. #ifndef XML_XML_NAMESPACE
  2623. /* xml: prefix is not really a namespace */
  2624. if ((cur[0] == 'x') && (cur[1] == 'm') &&
  2625. (cur[2] == 'l') && (cur[3] == ':'))
  2626. return(xmlStrdup(name));
  2627. #endif
  2628. /* nasty but well=formed */
  2629. if (cur[0] == ':')
  2630. return(xmlStrdup(name));
  2631. c = *cur++;
  2632. while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
  2633. buf[len++] = c;
  2634. c = *cur++;
  2635. }
  2636. if (len >= max) {
  2637. /*
  2638. * Okay someone managed to make a huge name, so he's ready to pay
  2639. * for the processing speed.
  2640. */
  2641. max = len * 2;
  2642. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2643. if (buffer == NULL) {
  2644. xmlErrMemory(ctxt, NULL);
  2645. return(NULL);
  2646. }
  2647. memcpy(buffer, buf, len);
  2648. while ((c != 0) && (c != ':')) { /* tested bigname.xml */
  2649. if (len + 10 > max) {
  2650. xmlChar *tmp;
  2651. max *= 2;
  2652. tmp = (xmlChar *) xmlRealloc(buffer,
  2653. max * sizeof(xmlChar));
  2654. if (tmp == NULL) {
  2655. xmlFree(buffer);
  2656. xmlErrMemory(ctxt, NULL);
  2657. return(NULL);
  2658. }
  2659. buffer = tmp;
  2660. }
  2661. buffer[len++] = c;
  2662. c = *cur++;
  2663. }
  2664. buffer[len] = 0;
  2665. }
  2666. if ((c == ':') && (*cur == 0)) {
  2667. if (buffer != NULL)
  2668. xmlFree(buffer);
  2669. *prefix = NULL;
  2670. return(xmlStrdup(name));
  2671. }
  2672. if (buffer == NULL)
  2673. ret = xmlStrndup(buf, len);
  2674. else {
  2675. ret = buffer;
  2676. buffer = NULL;
  2677. max = XML_MAX_NAMELEN;
  2678. }
  2679. if (c == ':') {
  2680. c = *cur;
  2681. *prefix = ret;
  2682. if (c == 0) {
  2683. return(xmlStrndup(BAD_CAST "", 0));
  2684. }
  2685. len = 0;
  2686. /*
  2687. * Check that the first character is proper to start
  2688. * a new name
  2689. */
  2690. if (!(((c >= 0x61) && (c <= 0x7A)) ||
  2691. ((c >= 0x41) && (c <= 0x5A)) ||
  2692. (c == '_') || (c == ':'))) {
  2693. int l;
  2694. int first = CUR_SCHAR(cur, l);
  2695. if (!IS_LETTER(first) && (first != '_')) {
  2696. xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
  2697. "Name %s is not XML Namespace compliant\n",
  2698. name);
  2699. }
  2700. }
  2701. cur++;
  2702. while ((c != 0) && (len < max)) { /* tested bigname2.xml */
  2703. buf[len++] = c;
  2704. c = *cur++;
  2705. }
  2706. if (len >= max) {
  2707. /*
  2708. * Okay someone managed to make a huge name, so he's ready to pay
  2709. * for the processing speed.
  2710. */
  2711. max = len * 2;
  2712. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  2713. if (buffer == NULL) {
  2714. xmlErrMemory(ctxt, NULL);
  2715. return(NULL);
  2716. }
  2717. memcpy(buffer, buf, len);
  2718. while (c != 0) { /* tested bigname2.xml */
  2719. if (len + 10 > max) {
  2720. xmlChar *tmp;
  2721. max *= 2;
  2722. tmp = (xmlChar *) xmlRealloc(buffer,
  2723. max * sizeof(xmlChar));
  2724. if (tmp == NULL) {
  2725. xmlErrMemory(ctxt, NULL);
  2726. xmlFree(buffer);
  2727. return(NULL);
  2728. }
  2729. buffer = tmp;
  2730. }
  2731. buffer[len++] = c;
  2732. c = *cur++;
  2733. }
  2734. buffer[len] = 0;
  2735. }
  2736. if (buffer == NULL)
  2737. ret = xmlStrndup(buf, len);
  2738. else {
  2739. ret = buffer;
  2740. }
  2741. }
  2742. return(ret);
  2743. }
  2744. /************************************************************************
  2745. * *
  2746. * The parser itself *
  2747. * Relates to http://www.w3.org/TR/REC-xml *
  2748. * *
  2749. ************************************************************************/
  2750. /************************************************************************
  2751. * *
  2752. * Routines to parse Name, NCName and NmToken *
  2753. * *
  2754. ************************************************************************/
  2755. #ifdef DEBUG
  2756. static unsigned long nbParseName = 0;
  2757. static unsigned long nbParseNmToken = 0;
  2758. static unsigned long nbParseNCName = 0;
  2759. static unsigned long nbParseNCNameComplex = 0;
  2760. static unsigned long nbParseNameComplex = 0;
  2761. static unsigned long nbParseStringName = 0;
  2762. #endif
  2763. /*
  2764. * The two following functions are related to the change of accepted
  2765. * characters for Name and NmToken in the Revision 5 of XML-1.0
  2766. * They correspond to the modified production [4] and the new production [4a]
  2767. * changes in that revision. Also note that the macros used for the
  2768. * productions Letter, Digit, CombiningChar and Extender are not needed
  2769. * anymore.
  2770. * We still keep compatibility to pre-revision5 parsing semantic if the
  2771. * new XML_PARSE_OLD10 option is given to the parser.
  2772. */
  2773. static int
  2774. xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
  2775. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2776. /*
  2777. * Use the new checks of production [4] [4a] amd [5] of the
  2778. * Update 5 of XML-1.0
  2779. */
  2780. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2781. (((c >= 'a') && (c <= 'z')) ||
  2782. ((c >= 'A') && (c <= 'Z')) ||
  2783. (c == '_') || (c == ':') ||
  2784. ((c >= 0xC0) && (c <= 0xD6)) ||
  2785. ((c >= 0xD8) && (c <= 0xF6)) ||
  2786. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2787. ((c >= 0x370) && (c <= 0x37D)) ||
  2788. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2789. ((c >= 0x200C) && (c <= 0x200D)) ||
  2790. ((c >= 0x2070) && (c <= 0x218F)) ||
  2791. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2792. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2793. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2794. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2795. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2796. return(1);
  2797. } else {
  2798. if (IS_LETTER(c) || (c == '_') || (c == ':'))
  2799. return(1);
  2800. }
  2801. return(0);
  2802. }
  2803. static int
  2804. xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
  2805. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2806. /*
  2807. * Use the new checks of production [4] [4a] amd [5] of the
  2808. * Update 5 of XML-1.0
  2809. */
  2810. if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2811. (((c >= 'a') && (c <= 'z')) ||
  2812. ((c >= 'A') && (c <= 'Z')) ||
  2813. ((c >= '0') && (c <= '9')) || /* !start */
  2814. (c == '_') || (c == ':') ||
  2815. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  2816. ((c >= 0xC0) && (c <= 0xD6)) ||
  2817. ((c >= 0xD8) && (c <= 0xF6)) ||
  2818. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2819. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  2820. ((c >= 0x370) && (c <= 0x37D)) ||
  2821. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2822. ((c >= 0x200C) && (c <= 0x200D)) ||
  2823. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  2824. ((c >= 0x2070) && (c <= 0x218F)) ||
  2825. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2826. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2827. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2828. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2829. ((c >= 0x10000) && (c <= 0xEFFFF))))
  2830. return(1);
  2831. } else {
  2832. if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  2833. (c == '.') || (c == '-') ||
  2834. (c == '_') || (c == ':') ||
  2835. (IS_COMBINING(c)) ||
  2836. (IS_EXTENDER(c)))
  2837. return(1);
  2838. }
  2839. return(0);
  2840. }
  2841. static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
  2842. int *len, int *alloc, int normalize);
  2843. static const xmlChar *
  2844. xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
  2845. int len = 0, l;
  2846. int c;
  2847. int count = 0;
  2848. #ifdef DEBUG
  2849. nbParseNameComplex++;
  2850. #endif
  2851. /*
  2852. * Handler for more complex cases
  2853. */
  2854. GROW;
  2855. c = CUR_CHAR(l);
  2856. if ((ctxt->options & XML_PARSE_OLD10) == 0) {
  2857. /*
  2858. * Use the new checks of production [4] [4a] amd [5] of the
  2859. * Update 5 of XML-1.0
  2860. */
  2861. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  2862. (!(((c >= 'a') && (c <= 'z')) ||
  2863. ((c >= 'A') && (c <= 'Z')) ||
  2864. (c == '_') || (c == ':') ||
  2865. ((c >= 0xC0) && (c <= 0xD6)) ||
  2866. ((c >= 0xD8) && (c <= 0xF6)) ||
  2867. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2868. ((c >= 0x370) && (c <= 0x37D)) ||
  2869. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2870. ((c >= 0x200C) && (c <= 0x200D)) ||
  2871. ((c >= 0x2070) && (c <= 0x218F)) ||
  2872. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2873. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2874. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2875. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2876. ((c >= 0x10000) && (c <= 0xEFFFF))))) {
  2877. return(NULL);
  2878. }
  2879. len += l;
  2880. NEXTL(l);
  2881. c = CUR_CHAR(l);
  2882. while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
  2883. (((c >= 'a') && (c <= 'z')) ||
  2884. ((c >= 'A') && (c <= 'Z')) ||
  2885. ((c >= '0') && (c <= '9')) || /* !start */
  2886. (c == '_') || (c == ':') ||
  2887. (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
  2888. ((c >= 0xC0) && (c <= 0xD6)) ||
  2889. ((c >= 0xD8) && (c <= 0xF6)) ||
  2890. ((c >= 0xF8) && (c <= 0x2FF)) ||
  2891. ((c >= 0x300) && (c <= 0x36F)) || /* !start */
  2892. ((c >= 0x370) && (c <= 0x37D)) ||
  2893. ((c >= 0x37F) && (c <= 0x1FFF)) ||
  2894. ((c >= 0x200C) && (c <= 0x200D)) ||
  2895. ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
  2896. ((c >= 0x2070) && (c <= 0x218F)) ||
  2897. ((c >= 0x2C00) && (c <= 0x2FEF)) ||
  2898. ((c >= 0x3001) && (c <= 0xD7FF)) ||
  2899. ((c >= 0xF900) && (c <= 0xFDCF)) ||
  2900. ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
  2901. ((c >= 0x10000) && (c <= 0xEFFFF))
  2902. )) {
  2903. if (count++ > 100) {
  2904. count = 0;
  2905. GROW;
  2906. }
  2907. len += l;
  2908. NEXTL(l);
  2909. c = CUR_CHAR(l);
  2910. }
  2911. } else {
  2912. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  2913. (!IS_LETTER(c) && (c != '_') &&
  2914. (c != ':'))) {
  2915. return(NULL);
  2916. }
  2917. len += l;
  2918. NEXTL(l);
  2919. c = CUR_CHAR(l);
  2920. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  2921. ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
  2922. (c == '.') || (c == '-') ||
  2923. (c == '_') || (c == ':') ||
  2924. (IS_COMBINING(c)) ||
  2925. (IS_EXTENDER(c)))) {
  2926. if (count++ > 100) {
  2927. count = 0;
  2928. GROW;
  2929. }
  2930. len += l;
  2931. NEXTL(l);
  2932. c = CUR_CHAR(l);
  2933. }
  2934. }
  2935. if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
  2936. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
  2937. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
  2938. }
  2939. /**
  2940. * xmlParseName:
  2941. * @ctxt: an XML parser context
  2942. *
  2943. * parse an XML name.
  2944. *
  2945. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  2946. * CombiningChar | Extender
  2947. *
  2948. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  2949. *
  2950. * [6] Names ::= Name (#x20 Name)*
  2951. *
  2952. * Returns the Name parsed or NULL
  2953. */
  2954. const xmlChar *
  2955. xmlParseName(xmlParserCtxtPtr ctxt) {
  2956. const xmlChar *in;
  2957. const xmlChar *ret;
  2958. int count = 0;
  2959. GROW;
  2960. #ifdef DEBUG
  2961. nbParseName++;
  2962. #endif
  2963. /*
  2964. * Accelerator for simple ASCII names
  2965. */
  2966. in = ctxt->input->cur;
  2967. if (((*in >= 0x61) && (*in <= 0x7A)) ||
  2968. ((*in >= 0x41) && (*in <= 0x5A)) ||
  2969. (*in == '_') || (*in == ':')) {
  2970. in++;
  2971. while (((*in >= 0x61) && (*in <= 0x7A)) ||
  2972. ((*in >= 0x41) && (*in <= 0x5A)) ||
  2973. ((*in >= 0x30) && (*in <= 0x39)) ||
  2974. (*in == '_') || (*in == '-') ||
  2975. (*in == ':') || (*in == '.'))
  2976. in++;
  2977. if ((*in > 0) && (*in < 0x80)) {
  2978. count = in - ctxt->input->cur;
  2979. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  2980. ctxt->input->cur = in;
  2981. ctxt->nbChars += count;
  2982. ctxt->input->col += count;
  2983. if (ret == NULL)
  2984. xmlErrMemory(ctxt, NULL);
  2985. return(ret);
  2986. }
  2987. }
  2988. /* accelerator for special cases */
  2989. return(xmlParseNameComplex(ctxt));
  2990. }
  2991. static const xmlChar *
  2992. xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
  2993. int len = 0, l;
  2994. int c;
  2995. int count = 0;
  2996. #ifdef DEBUG
  2997. nbParseNCNameComplex++;
  2998. #endif
  2999. /*
  3000. * Handler for more complex cases
  3001. */
  3002. GROW;
  3003. c = CUR_CHAR(l);
  3004. if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
  3005. (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
  3006. return(NULL);
  3007. }
  3008. while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
  3009. (xmlIsNameChar(ctxt, c) && (c != ':'))) {
  3010. if (count++ > 100) {
  3011. count = 0;
  3012. GROW;
  3013. }
  3014. len += l;
  3015. NEXTL(l);
  3016. c = CUR_CHAR(l);
  3017. }
  3018. return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
  3019. }
  3020. /**
  3021. * xmlParseNCName:
  3022. * @ctxt: an XML parser context
  3023. * @len: lenght of the string parsed
  3024. *
  3025. * parse an XML name.
  3026. *
  3027. * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
  3028. * CombiningChar | Extender
  3029. *
  3030. * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
  3031. *
  3032. * Returns the Name parsed or NULL
  3033. */
  3034. static const xmlChar *
  3035. xmlParseNCName(xmlParserCtxtPtr ctxt) {
  3036. const xmlChar *in;
  3037. const xmlChar *ret;
  3038. int count = 0;
  3039. #ifdef DEBUG
  3040. nbParseNCName++;
  3041. #endif
  3042. /*
  3043. * Accelerator for simple ASCII names
  3044. */
  3045. in = ctxt->input->cur;
  3046. if (((*in >= 0x61) && (*in <= 0x7A)) ||
  3047. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3048. (*in == '_')) {
  3049. in++;
  3050. while (((*in >= 0x61) && (*in <= 0x7A)) ||
  3051. ((*in >= 0x41) && (*in <= 0x5A)) ||
  3052. ((*in >= 0x30) && (*in <= 0x39)) ||
  3053. (*in == '_') || (*in == '-') ||
  3054. (*in == '.'))
  3055. in++;
  3056. if ((*in > 0) && (*in < 0x80)) {
  3057. count = in - ctxt->input->cur;
  3058. ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
  3059. ctxt->input->cur = in;
  3060. ctxt->nbChars += count;
  3061. ctxt->input->col += count;
  3062. if (ret == NULL) {
  3063. xmlErrMemory(ctxt, NULL);
  3064. }
  3065. return(ret);
  3066. }
  3067. }
  3068. return(xmlParseNCNameComplex(ctxt));
  3069. }
  3070. /**
  3071. * xmlParseNameAndCompare:
  3072. * @ctxt: an XML parser context
  3073. *
  3074. * parse an XML name and compares for match
  3075. * (specialized for endtag parsing)
  3076. *
  3077. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  3078. * and the name for mismatch
  3079. */
  3080. static const xmlChar *
  3081. xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
  3082. register const xmlChar *cmp = other;
  3083. register const xmlChar *in;
  3084. const xmlChar *ret;
  3085. GROW;
  3086. in = ctxt->input->cur;
  3087. while (*in != 0 && *in == *cmp) {
  3088. ++in;
  3089. ++cmp;
  3090. ctxt->input->col++;
  3091. }
  3092. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  3093. /* success */
  3094. ctxt->input->cur = in;
  3095. return (const xmlChar*) 1;
  3096. }
  3097. /* failure (or end of input buffer), check with full function */
  3098. ret = xmlParseName (ctxt);
  3099. /* strings coming from the dictionnary direct compare possible */
  3100. if (ret == other) {
  3101. return (const xmlChar*) 1;
  3102. }
  3103. return ret;
  3104. }
  3105. /**
  3106. * xmlParseStringName:
  3107. * @ctxt: an XML parser context
  3108. * @str: a pointer to the string pointer (IN/OUT)
  3109. *
  3110. * parse an XML name.
  3111. *
  3112. * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  3113. * CombiningChar | Extender
  3114. *
  3115. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  3116. *
  3117. * [6] Names ::= Name (#x20 Name)*
  3118. *
  3119. * Returns the Name parsed or NULL. The @str pointer
  3120. * is updated to the current location in the string.
  3121. */
  3122. static xmlChar *
  3123. xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
  3124. xmlChar buf[XML_MAX_NAMELEN + 5];
  3125. const xmlChar *cur = *str;
  3126. int len = 0, l;
  3127. int c;
  3128. #ifdef DEBUG
  3129. nbParseStringName++;
  3130. #endif
  3131. c = CUR_SCHAR(cur, l);
  3132. if (!xmlIsNameStartChar(ctxt, c)) {
  3133. return(NULL);
  3134. }
  3135. COPY_BUF(l,buf,len,c);
  3136. cur += l;
  3137. c = CUR_SCHAR(cur, l);
  3138. while (xmlIsNameChar(ctxt, c)) {
  3139. COPY_BUF(l,buf,len,c);
  3140. cur += l;
  3141. c = CUR_SCHAR(cur, l);
  3142. if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
  3143. /*
  3144. * Okay someone managed to make a huge name, so he's ready to pay
  3145. * for the processing speed.
  3146. */
  3147. xmlChar *buffer;
  3148. int max = len * 2;
  3149. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3150. if (buffer == NULL) {
  3151. xmlErrMemory(ctxt, NULL);
  3152. return(NULL);
  3153. }
  3154. memcpy(buffer, buf, len);
  3155. while (xmlIsNameChar(ctxt, c)) {
  3156. if (len + 10 > max) {
  3157. xmlChar *tmp;
  3158. max *= 2;
  3159. tmp = (xmlChar *) xmlRealloc(buffer,
  3160. max * sizeof(xmlChar));
  3161. if (tmp == NULL) {
  3162. xmlErrMemory(ctxt, NULL);
  3163. xmlFree(buffer);
  3164. return(NULL);
  3165. }
  3166. buffer = tmp;
  3167. }
  3168. COPY_BUF(l,buffer,len,c);
  3169. cur += l;
  3170. c = CUR_SCHAR(cur, l);
  3171. }
  3172. buffer[len] = 0;
  3173. *str = cur;
  3174. return(buffer);
  3175. }
  3176. }
  3177. *str = cur;
  3178. return(xmlStrndup(buf, len));
  3179. }
  3180. /**
  3181. * xmlParseNmtoken:
  3182. * @ctxt: an XML parser context
  3183. *
  3184. * parse an XML Nmtoken.
  3185. *
  3186. * [7] Nmtoken ::= (NameChar)+
  3187. *
  3188. * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  3189. *
  3190. * Returns the Nmtoken parsed or NULL
  3191. */
  3192. xmlChar *
  3193. xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
  3194. xmlChar buf[XML_MAX_NAMELEN + 5];
  3195. int len = 0, l;
  3196. int c;
  3197. int count = 0;
  3198. #ifdef DEBUG
  3199. nbParseNmToken++;
  3200. #endif
  3201. GROW;
  3202. c = CUR_CHAR(l);
  3203. while (xmlIsNameChar(ctxt, c)) {
  3204. if (count++ > 100) {
  3205. count = 0;
  3206. GROW;
  3207. }
  3208. COPY_BUF(l,buf,len,c);
  3209. NEXTL(l);
  3210. c = CUR_CHAR(l);
  3211. if (len >= XML_MAX_NAMELEN) {
  3212. /*
  3213. * Okay someone managed to make a huge token, so he's ready to pay
  3214. * for the processing speed.
  3215. */
  3216. xmlChar *buffer;
  3217. int max = len * 2;
  3218. buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
  3219. if (buffer == NULL) {
  3220. xmlErrMemory(ctxt, NULL);
  3221. return(NULL);
  3222. }
  3223. memcpy(buffer, buf, len);
  3224. while (xmlIsNameChar(ctxt, c)) {
  3225. if (count++ > 100) {
  3226. count = 0;
  3227. GROW;
  3228. }
  3229. if (len + 10 > max) {
  3230. xmlChar *tmp;
  3231. max *= 2;
  3232. tmp = (xmlChar *) xmlRealloc(buffer,
  3233. max * sizeof(xmlChar));
  3234. if (tmp == NULL) {
  3235. xmlErrMemory(ctxt, NULL);
  3236. xmlFree(buffer);
  3237. return(NULL);
  3238. }
  3239. buffer = tmp;
  3240. }
  3241. COPY_BUF(l,buffer,len,c);
  3242. NEXTL(l);
  3243. c = CUR_CHAR(l);
  3244. }
  3245. buffer[len] = 0;
  3246. return(buffer);
  3247. }
  3248. }
  3249. if (len == 0)
  3250. return(NULL);
  3251. return(xmlStrndup(buf, len));
  3252. }
  3253. /**
  3254. * xmlParseEntityValue:
  3255. * @ctxt: an XML parser context
  3256. * @orig: if non-NULL store a copy of the original entity value
  3257. *
  3258. * parse a value for ENTITY declarations
  3259. *
  3260. * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
  3261. * "'" ([^%&'] | PEReference | Reference)* "'"
  3262. *
  3263. * Returns the EntityValue parsed with reference substituted or NULL
  3264. */
  3265. xmlChar *
  3266. xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
  3267. xmlChar *buf = NULL;
  3268. int len = 0;
  3269. int size = XML_PARSER_BUFFER_SIZE;
  3270. int c, l;
  3271. xmlChar stop;
  3272. xmlChar *ret = NULL;
  3273. const xmlChar *cur = NULL;
  3274. xmlParserInputPtr input;
  3275. if (RAW == '"') stop = '"';
  3276. else if (RAW == '\'') stop = '\'';
  3277. else {
  3278. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
  3279. return(NULL);
  3280. }
  3281. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3282. if (buf == NULL) {
  3283. xmlErrMemory(ctxt, NULL);
  3284. return(NULL);
  3285. }
  3286. /*
  3287. * The content of the entity definition is copied in a buffer.
  3288. */
  3289. ctxt->instate = XML_PARSER_ENTITY_VALUE;
  3290. input = ctxt->input;
  3291. GROW;
  3292. NEXT;
  3293. c = CUR_CHAR(l);
  3294. /*
  3295. * NOTE: 4.4.5 Included in Literal
  3296. * When a parameter entity reference appears in a literal entity
  3297. * value, ... a single or double quote character in the replacement
  3298. * text is always treated as a normal data character and will not
  3299. * terminate the literal.
  3300. * In practice it means we stop the loop only when back at parsing
  3301. * the initial entity and the quote is found
  3302. */
  3303. while ((IS_CHAR(c)) && ((c != stop) || /* checked */
  3304. (ctxt->input != input))) {
  3305. if (len + 5 >= size) {
  3306. xmlChar *tmp;
  3307. size *= 2;
  3308. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3309. if (tmp == NULL) {
  3310. xmlErrMemory(ctxt, NULL);
  3311. xmlFree(buf);
  3312. return(NULL);
  3313. }
  3314. buf = tmp;
  3315. }
  3316. COPY_BUF(l,buf,len,c);
  3317. NEXTL(l);
  3318. /*
  3319. * Pop-up of finished entities.
  3320. */
  3321. while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
  3322. xmlPopInput(ctxt);
  3323. GROW;
  3324. c = CUR_CHAR(l);
  3325. if (c == 0) {
  3326. GROW;
  3327. c = CUR_CHAR(l);
  3328. }
  3329. }
  3330. buf[len] = 0;
  3331. /*
  3332. * Raise problem w.r.t. '&' and '%' being used in non-entities
  3333. * reference constructs. Note Charref will be handled in
  3334. * xmlStringDecodeEntities()
  3335. */
  3336. cur = buf;
  3337. while (*cur != 0) { /* non input consuming */
  3338. if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
  3339. xmlChar *name;
  3340. xmlChar tmp = *cur;
  3341. cur++;
  3342. name = xmlParseStringName(ctxt, &cur);
  3343. if ((name == NULL) || (*cur != ';')) {
  3344. xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
  3345. "EntityValue: '%c' forbidden except for entities references\n",
  3346. tmp);
  3347. }
  3348. if ((tmp == '%') && (ctxt->inSubset == 1) &&
  3349. (ctxt->inputNr == 1)) {
  3350. xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
  3351. }
  3352. if (name != NULL)
  3353. xmlFree(name);
  3354. if (*cur == 0)
  3355. break;
  3356. }
  3357. cur++;
  3358. }
  3359. /*
  3360. * Then PEReference entities are substituted.
  3361. */
  3362. if (c != stop) {
  3363. xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
  3364. xmlFree(buf);
  3365. } else {
  3366. NEXT;
  3367. /*
  3368. * NOTE: 4.4.7 Bypassed
  3369. * When a general entity reference appears in the EntityValue in
  3370. * an entity declaration, it is bypassed and left as is.
  3371. * so XML_SUBSTITUTE_REF is not set here.
  3372. */
  3373. ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
  3374. 0, 0, 0);
  3375. if (orig != NULL)
  3376. *orig = buf;
  3377. else
  3378. xmlFree(buf);
  3379. }
  3380. return(ret);
  3381. }
  3382. /**
  3383. * xmlParseAttValueComplex:
  3384. * @ctxt: an XML parser context
  3385. * @len: the resulting attribute len
  3386. * @normalize: wether to apply the inner normalization
  3387. *
  3388. * parse a value for an attribute, this is the fallback function
  3389. * of xmlParseAttValue() when the attribute parsing requires handling
  3390. * of non-ASCII characters, or normalization compaction.
  3391. *
  3392. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3393. */
  3394. static xmlChar *
  3395. xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
  3396. xmlChar limit = 0;
  3397. xmlChar *buf = NULL;
  3398. xmlChar *rep = NULL;
  3399. int len = 0;
  3400. int buf_size = 0;
  3401. int c, l, in_space = 0;
  3402. xmlChar *current = NULL;
  3403. xmlEntityPtr ent;
  3404. if (NXT(0) == '"') {
  3405. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3406. limit = '"';
  3407. NEXT;
  3408. } else if (NXT(0) == '\'') {
  3409. limit = '\'';
  3410. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  3411. NEXT;
  3412. } else {
  3413. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  3414. return(NULL);
  3415. }
  3416. /*
  3417. * allocate a translation buffer.
  3418. */
  3419. buf_size = XML_PARSER_BUFFER_SIZE;
  3420. buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
  3421. if (buf == NULL) goto mem_error;
  3422. /*
  3423. * OK loop until we reach one of the ending char or a size limit.
  3424. */
  3425. c = CUR_CHAR(l);
  3426. while ((NXT(0) != limit) && /* checked */
  3427. (IS_CHAR(c)) && (c != '<')) {
  3428. if (c == 0) break;
  3429. if (c == '&') {
  3430. in_space = 0;
  3431. if (NXT(1) == '#') {
  3432. int val = xmlParseCharRef(ctxt);
  3433. if (val == '&') {
  3434. if (ctxt->replaceEntities) {
  3435. if (len > buf_size - 10) {
  3436. growBuffer(buf, 10);
  3437. }
  3438. buf[len++] = '&';
  3439. } else {
  3440. /*
  3441. * The reparsing will be done in xmlStringGetNodeList()
  3442. * called by the attribute() function in SAX.c
  3443. */
  3444. if (len > buf_size - 10) {
  3445. growBuffer(buf, 10);
  3446. }
  3447. buf[len++] = '&';
  3448. buf[len++] = '#';
  3449. buf[len++] = '3';
  3450. buf[len++] = '8';
  3451. buf[len++] = ';';
  3452. }
  3453. } else if (val != 0) {
  3454. if (len > buf_size - 10) {
  3455. growBuffer(buf, 10);
  3456. }
  3457. len += xmlCopyChar(0, &buf[len], val);
  3458. }
  3459. } else {
  3460. ent = xmlParseEntityRef(ctxt);
  3461. ctxt->nbentities++;
  3462. if (ent != NULL)
  3463. ctxt->nbentities += ent->owner;
  3464. if ((ent != NULL) &&
  3465. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  3466. if (len > buf_size - 10) {
  3467. growBuffer(buf, 10);
  3468. }
  3469. if ((ctxt->replaceEntities == 0) &&
  3470. (ent->content[0] == '&')) {
  3471. buf[len++] = '&';
  3472. buf[len++] = '#';
  3473. buf[len++] = '3';
  3474. buf[len++] = '8';
  3475. buf[len++] = ';';
  3476. } else {
  3477. buf[len++] = ent->content[0];
  3478. }
  3479. } else if ((ent != NULL) &&
  3480. (ctxt->replaceEntities != 0)) {
  3481. if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
  3482. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3483. XML_SUBSTITUTE_REF,
  3484. 0, 0, 0);
  3485. if (rep != NULL) {
  3486. current = rep;
  3487. while (*current != 0) { /* non input consuming */
  3488. if ((*current == 0xD) || (*current == 0xA) ||
  3489. (*current == 0x9)) {
  3490. buf[len++] = 0x20;
  3491. current++;
  3492. } else
  3493. buf[len++] = *current++;
  3494. if (len > buf_size - 10) {
  3495. growBuffer(buf, 10);
  3496. }
  3497. }
  3498. xmlFree(rep);
  3499. rep = NULL;
  3500. }
  3501. } else {
  3502. if (len > buf_size - 10) {
  3503. growBuffer(buf, 10);
  3504. }
  3505. if (ent->content != NULL)
  3506. buf[len++] = ent->content[0];
  3507. }
  3508. } else if (ent != NULL) {
  3509. int i = xmlStrlen(ent->name);
  3510. const xmlChar *cur = ent->name;
  3511. /*
  3512. * This may look absurd but is needed to detect
  3513. * entities problems
  3514. */
  3515. if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  3516. (ent->content != NULL)) {
  3517. rep = xmlStringDecodeEntities(ctxt, ent->content,
  3518. XML_SUBSTITUTE_REF, 0, 0, 0);
  3519. if (rep != NULL) {
  3520. xmlFree(rep);
  3521. rep = NULL;
  3522. }
  3523. }
  3524. /*
  3525. * Just output the reference
  3526. */
  3527. buf[len++] = '&';
  3528. while (len > buf_size - i - 10) {
  3529. growBuffer(buf, i + 10);
  3530. }
  3531. for (;i > 0;i--)
  3532. buf[len++] = *cur++;
  3533. buf[len++] = ';';
  3534. }
  3535. }
  3536. } else {
  3537. if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
  3538. if ((len != 0) || (!normalize)) {
  3539. if ((!normalize) || (!in_space)) {
  3540. COPY_BUF(l,buf,len,0x20);
  3541. while (len > buf_size - 10) {
  3542. growBuffer(buf, 10);
  3543. }
  3544. }
  3545. in_space = 1;
  3546. }
  3547. } else {
  3548. in_space = 0;
  3549. COPY_BUF(l,buf,len,c);
  3550. if (len > buf_size - 10) {
  3551. growBuffer(buf, 10);
  3552. }
  3553. }
  3554. NEXTL(l);
  3555. }
  3556. GROW;
  3557. c = CUR_CHAR(l);
  3558. }
  3559. if ((in_space) && (normalize)) {
  3560. while (buf[len - 1] == 0x20) len--;
  3561. }
  3562. buf[len] = 0;
  3563. if (RAW == '<') {
  3564. xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
  3565. } else if (RAW != limit) {
  3566. if ((c != 0) && (!IS_CHAR(c))) {
  3567. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
  3568. "invalid character in attribute value\n");
  3569. } else {
  3570. xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
  3571. "AttValue: ' expected\n");
  3572. }
  3573. } else
  3574. NEXT;
  3575. if (attlen != NULL) *attlen = len;
  3576. return(buf);
  3577. mem_error:
  3578. xmlErrMemory(ctxt, NULL);
  3579. if (buf != NULL)
  3580. xmlFree(buf);
  3581. if (rep != NULL)
  3582. xmlFree(rep);
  3583. return(NULL);
  3584. }
  3585. /**
  3586. * xmlParseAttValue:
  3587. * @ctxt: an XML parser context
  3588. *
  3589. * parse a value for an attribute
  3590. * Note: the parser won't do substitution of entities here, this
  3591. * will be handled later in xmlStringGetNodeList
  3592. *
  3593. * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
  3594. * "'" ([^<&'] | Reference)* "'"
  3595. *
  3596. * 3.3.3 Attribute-Value Normalization:
  3597. * Before the value of an attribute is passed to the application or
  3598. * checked for validity, the XML processor must normalize it as follows:
  3599. * - a character reference is processed by appending the referenced
  3600. * character to the attribute value
  3601. * - an entity reference is processed by recursively processing the
  3602. * replacement text of the entity
  3603. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  3604. * appending #x20 to the normalized value, except that only a single
  3605. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  3606. * parsed entity or the literal entity value of an internal parsed entity
  3607. * - other characters are processed by appending them to the normalized value
  3608. * If the declared value is not CDATA, then the XML processor must further
  3609. * process the normalized attribute value by discarding any leading and
  3610. * trailing space (#x20) characters, and by replacing sequences of space
  3611. * (#x20) characters by a single space (#x20) character.
  3612. * All attributes for which no declaration has been read should be treated
  3613. * by a non-validating parser as if declared CDATA.
  3614. *
  3615. * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
  3616. */
  3617. xmlChar *
  3618. xmlParseAttValue(xmlParserCtxtPtr ctxt) {
  3619. if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
  3620. return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
  3621. }
  3622. /**
  3623. * xmlParseSystemLiteral:
  3624. * @ctxt: an XML parser context
  3625. *
  3626. * parse an XML Literal
  3627. *
  3628. * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
  3629. *
  3630. * Returns the SystemLiteral parsed or NULL
  3631. */
  3632. xmlChar *
  3633. xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
  3634. xmlChar *buf = NULL;
  3635. int len = 0;
  3636. int size = XML_PARSER_BUFFER_SIZE;
  3637. int cur, l;
  3638. xmlChar stop;
  3639. int state = ctxt->instate;
  3640. int count = 0;
  3641. SHRINK;
  3642. if (RAW == '"') {
  3643. NEXT;
  3644. stop = '"';
  3645. } else if (RAW == '\'') {
  3646. NEXT;
  3647. stop = '\'';
  3648. } else {
  3649. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  3650. return(NULL);
  3651. }
  3652. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3653. if (buf == NULL) {
  3654. xmlErrMemory(ctxt, NULL);
  3655. return(NULL);
  3656. }
  3657. ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
  3658. cur = CUR_CHAR(l);
  3659. while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
  3660. if (len + 5 >= size) {
  3661. xmlChar *tmp;
  3662. size *= 2;
  3663. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3664. if (tmp == NULL) {
  3665. xmlFree(buf);
  3666. xmlErrMemory(ctxt, NULL);
  3667. ctxt->instate = (xmlParserInputState) state;
  3668. return(NULL);
  3669. }
  3670. buf = tmp;
  3671. }
  3672. count++;
  3673. if (count > 50) {
  3674. GROW;
  3675. count = 0;
  3676. }
  3677. COPY_BUF(l,buf,len,cur);
  3678. NEXTL(l);
  3679. cur = CUR_CHAR(l);
  3680. if (cur == 0) {
  3681. GROW;
  3682. SHRINK;
  3683. cur = CUR_CHAR(l);
  3684. }
  3685. }
  3686. buf[len] = 0;
  3687. ctxt->instate = (xmlParserInputState) state;
  3688. if (!IS_CHAR(cur)) {
  3689. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  3690. } else {
  3691. NEXT;
  3692. }
  3693. return(buf);
  3694. }
  3695. /**
  3696. * xmlParsePubidLiteral:
  3697. * @ctxt: an XML parser context
  3698. *
  3699. * parse an XML public literal
  3700. *
  3701. * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  3702. *
  3703. * Returns the PubidLiteral parsed or NULL.
  3704. */
  3705. xmlChar *
  3706. xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
  3707. xmlChar *buf = NULL;
  3708. int len = 0;
  3709. int size = XML_PARSER_BUFFER_SIZE;
  3710. xmlChar cur;
  3711. xmlChar stop;
  3712. int count = 0;
  3713. xmlParserInputState oldstate = ctxt->instate;
  3714. SHRINK;
  3715. if (RAW == '"') {
  3716. NEXT;
  3717. stop = '"';
  3718. } else if (RAW == '\'') {
  3719. NEXT;
  3720. stop = '\'';
  3721. } else {
  3722. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
  3723. return(NULL);
  3724. }
  3725. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  3726. if (buf == NULL) {
  3727. xmlErrMemory(ctxt, NULL);
  3728. return(NULL);
  3729. }
  3730. ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
  3731. cur = CUR;
  3732. while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
  3733. if (len + 1 >= size) {
  3734. xmlChar *tmp;
  3735. size *= 2;
  3736. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  3737. if (tmp == NULL) {
  3738. xmlErrMemory(ctxt, NULL);
  3739. xmlFree(buf);
  3740. return(NULL);
  3741. }
  3742. buf = tmp;
  3743. }
  3744. buf[len++] = cur;
  3745. count++;
  3746. if (count > 50) {
  3747. GROW;
  3748. count = 0;
  3749. }
  3750. NEXT;
  3751. cur = CUR;
  3752. if (cur == 0) {
  3753. GROW;
  3754. SHRINK;
  3755. cur = CUR;
  3756. }
  3757. }
  3758. buf[len] = 0;
  3759. if (cur != stop) {
  3760. xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
  3761. } else {
  3762. NEXT;
  3763. }
  3764. ctxt->instate = oldstate;
  3765. return(buf);
  3766. }
  3767. static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
  3768. /*
  3769. * used for the test in the inner loop of the char data testing
  3770. */
  3771. static const unsigned char test_char_data[256] = {
  3772. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3773. 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
  3774. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3775. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3776. 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
  3777. 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
  3778. 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  3779. 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
  3780. 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
  3781. 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
  3782. 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
  3783. 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
  3784. 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  3785. 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  3786. 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  3787. 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
  3788. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
  3789. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3790. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3791. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3792. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3793. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3794. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3795. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3796. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3797. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3798. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3799. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3800. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3801. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3802. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  3803. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  3804. };
  3805. /**
  3806. * xmlParseCharData:
  3807. * @ctxt: an XML parser context
  3808. * @cdata: int indicating whether we are within a CDATA section
  3809. *
  3810. * parse a CharData section.
  3811. * if we are within a CDATA section ']]>' marks an end of section.
  3812. *
  3813. * The right angle bracket (>) may be represented using the string "&gt;",
  3814. * and must, for compatibility, be escaped using "&gt;" or a character
  3815. * reference when it appears in the string "]]>" in content, when that
  3816. * string is not marking the end of a CDATA section.
  3817. *
  3818. * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  3819. */
  3820. void
  3821. xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
  3822. const xmlChar *in;
  3823. int nbchar = 0;
  3824. int line = ctxt->input->line;
  3825. int col = ctxt->input->col;
  3826. int ccol;
  3827. SHRINK;
  3828. GROW;
  3829. /*
  3830. * Accelerated common case where input don't need to be
  3831. * modified before passing it to the handler.
  3832. */
  3833. if (!cdata) {
  3834. in = ctxt->input->cur;
  3835. do {
  3836. get_more_space:
  3837. while (*in == 0x20) { in++; ctxt->input->col++; }
  3838. if (*in == 0xA) {
  3839. do {
  3840. ctxt->input->line++; ctxt->input->col = 1;
  3841. in++;
  3842. } while (*in == 0xA);
  3843. goto get_more_space;
  3844. }
  3845. if (*in == '<') {
  3846. nbchar = in - ctxt->input->cur;
  3847. if (nbchar > 0) {
  3848. const xmlChar *tmp = ctxt->input->cur;
  3849. ctxt->input->cur = in;
  3850. if ((ctxt->sax != NULL) &&
  3851. (ctxt->sax->ignorableWhitespace !=
  3852. ctxt->sax->characters)) {
  3853. if (areBlanks(ctxt, tmp, nbchar, 1)) {
  3854. if (ctxt->sax->ignorableWhitespace != NULL)
  3855. ctxt->sax->ignorableWhitespace(ctxt->userData,
  3856. tmp, nbchar);
  3857. } else {
  3858. if (ctxt->sax->characters != NULL)
  3859. ctxt->sax->characters(ctxt->userData,
  3860. tmp, nbchar);
  3861. if (*ctxt->space == -1)
  3862. *ctxt->space = -2;
  3863. }
  3864. } else if ((ctxt->sax != NULL) &&
  3865. (ctxt->sax->characters != NULL)) {
  3866. ctxt->sax->characters(ctxt->userData,
  3867. tmp, nbchar);
  3868. }
  3869. }
  3870. return;
  3871. }
  3872. get_more:
  3873. ccol = ctxt->input->col;
  3874. while (test_char_data[*in]) {
  3875. in++;
  3876. ccol++;
  3877. }
  3878. ctxt->input->col = ccol;
  3879. if (*in == 0xA) {
  3880. do {
  3881. ctxt->input->line++; ctxt->input->col = 1;
  3882. in++;
  3883. } while (*in == 0xA);
  3884. goto get_more;
  3885. }
  3886. if (*in == ']') {
  3887. if ((in[1] == ']') && (in[2] == '>')) {
  3888. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  3889. ctxt->input->cur = in;
  3890. return;
  3891. }
  3892. in++;
  3893. ctxt->input->col++;
  3894. goto get_more;
  3895. }
  3896. nbchar = in - ctxt->input->cur;
  3897. if (nbchar > 0) {
  3898. if ((ctxt->sax != NULL) &&
  3899. (ctxt->sax->ignorableWhitespace !=
  3900. ctxt->sax->characters) &&
  3901. (IS_BLANK_CH(*ctxt->input->cur))) {
  3902. const xmlChar *tmp = ctxt->input->cur;
  3903. ctxt->input->cur = in;
  3904. if (areBlanks(ctxt, tmp, nbchar, 0)) {
  3905. if (ctxt->sax->ignorableWhitespace != NULL)
  3906. ctxt->sax->ignorableWhitespace(ctxt->userData,
  3907. tmp, nbchar);
  3908. } else {
  3909. if (ctxt->sax->characters != NULL)
  3910. ctxt->sax->characters(ctxt->userData,
  3911. tmp, nbchar);
  3912. if (*ctxt->space == -1)
  3913. *ctxt->space = -2;
  3914. }
  3915. line = ctxt->input->line;
  3916. col = ctxt->input->col;
  3917. } else if (ctxt->sax != NULL) {
  3918. if (ctxt->sax->characters != NULL)
  3919. ctxt->sax->characters(ctxt->userData,
  3920. ctxt->input->cur, nbchar);
  3921. line = ctxt->input->line;
  3922. col = ctxt->input->col;
  3923. }
  3924. /* something really bad happened in the SAX callback */
  3925. if (ctxt->instate != XML_PARSER_CONTENT)
  3926. return;
  3927. }
  3928. ctxt->input->cur = in;
  3929. if (*in == 0xD) {
  3930. in++;
  3931. if (*in == 0xA) {
  3932. ctxt->input->cur = in;
  3933. in++;
  3934. ctxt->input->line++; ctxt->input->col = 1;
  3935. continue; /* while */
  3936. }
  3937. in--;
  3938. }
  3939. if (*in == '<') {
  3940. return;
  3941. }
  3942. if (*in == '&') {
  3943. return;
  3944. }
  3945. SHRINK;
  3946. GROW;
  3947. in = ctxt->input->cur;
  3948. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
  3949. nbchar = 0;
  3950. }
  3951. ctxt->input->line = line;
  3952. ctxt->input->col = col;
  3953. xmlParseCharDataComplex(ctxt, cdata);
  3954. }
  3955. /**
  3956. * xmlParseCharDataComplex:
  3957. * @ctxt: an XML parser context
  3958. * @cdata: int indicating whether we are within a CDATA section
  3959. *
  3960. * parse a CharData section.this is the fallback function
  3961. * of xmlParseCharData() when the parsing requires handling
  3962. * of non-ASCII characters.
  3963. */
  3964. static void
  3965. xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
  3966. xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
  3967. int nbchar = 0;
  3968. int cur, l;
  3969. int count = 0;
  3970. SHRINK;
  3971. GROW;
  3972. cur = CUR_CHAR(l);
  3973. while ((cur != '<') && /* checked */
  3974. (cur != '&') &&
  3975. (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
  3976. if ((cur == ']') && (NXT(1) == ']') &&
  3977. (NXT(2) == '>')) {
  3978. if (cdata) break;
  3979. else {
  3980. xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
  3981. }
  3982. }
  3983. COPY_BUF(l,buf,nbchar,cur);
  3984. if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
  3985. buf[nbchar] = 0;
  3986. /*
  3987. * OK the segment is to be consumed as chars.
  3988. */
  3989. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  3990. if (areBlanks(ctxt, buf, nbchar, 0)) {
  3991. if (ctxt->sax->ignorableWhitespace != NULL)
  3992. ctxt->sax->ignorableWhitespace(ctxt->userData,
  3993. buf, nbchar);
  3994. } else {
  3995. if (ctxt->sax->characters != NULL)
  3996. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  3997. if ((ctxt->sax->characters !=
  3998. ctxt->sax->ignorableWhitespace) &&
  3999. (*ctxt->space == -1))
  4000. *ctxt->space = -2;
  4001. }
  4002. }
  4003. nbchar = 0;
  4004. /* something really bad happened in the SAX callback */
  4005. if (ctxt->instate != XML_PARSER_CONTENT)
  4006. return;
  4007. }
  4008. count++;
  4009. if (count > 50) {
  4010. GROW;
  4011. count = 0;
  4012. }
  4013. NEXTL(l);
  4014. cur = CUR_CHAR(l);
  4015. }
  4016. if (nbchar != 0) {
  4017. buf[nbchar] = 0;
  4018. /*
  4019. * OK the segment is to be consumed as chars.
  4020. */
  4021. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  4022. if (areBlanks(ctxt, buf, nbchar, 0)) {
  4023. if (ctxt->sax->ignorableWhitespace != NULL)
  4024. ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
  4025. } else {
  4026. if (ctxt->sax->characters != NULL)
  4027. ctxt->sax->characters(ctxt->userData, buf, nbchar);
  4028. if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
  4029. (*ctxt->space == -1))
  4030. *ctxt->space = -2;
  4031. }
  4032. }
  4033. }
  4034. if ((cur != 0) && (!IS_CHAR(cur))) {
  4035. /* Generate the error and skip the offending character */
  4036. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4037. "PCDATA invalid Char value %d\n",
  4038. cur);
  4039. NEXTL(l);
  4040. }
  4041. }
  4042. /**
  4043. * xmlParseExternalID:
  4044. * @ctxt: an XML parser context
  4045. * @publicID: a xmlChar** receiving PubidLiteral
  4046. * @strict: indicate whether we should restrict parsing to only
  4047. * production [75], see NOTE below
  4048. *
  4049. * Parse an External ID or a Public ID
  4050. *
  4051. * NOTE: Productions [75] and [83] interact badly since [75] can generate
  4052. * 'PUBLIC' S PubidLiteral S SystemLiteral
  4053. *
  4054. * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
  4055. * | 'PUBLIC' S PubidLiteral S SystemLiteral
  4056. *
  4057. * [83] PublicID ::= 'PUBLIC' S PubidLiteral
  4058. *
  4059. * Returns the function returns SystemLiteral and in the second
  4060. * case publicID receives PubidLiteral, is strict is off
  4061. * it is possible to return NULL and have publicID set.
  4062. */
  4063. xmlChar *
  4064. xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
  4065. xmlChar *URI = NULL;
  4066. SHRINK;
  4067. *publicID = NULL;
  4068. if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
  4069. SKIP(6);
  4070. if (!IS_BLANK_CH(CUR)) {
  4071. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4072. "Space required after 'SYSTEM'\n");
  4073. }
  4074. SKIP_BLANKS;
  4075. URI = xmlParseSystemLiteral(ctxt);
  4076. if (URI == NULL) {
  4077. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4078. }
  4079. } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
  4080. SKIP(6);
  4081. if (!IS_BLANK_CH(CUR)) {
  4082. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4083. "Space required after 'PUBLIC'\n");
  4084. }
  4085. SKIP_BLANKS;
  4086. *publicID = xmlParsePubidLiteral(ctxt);
  4087. if (*publicID == NULL) {
  4088. xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
  4089. }
  4090. if (strict) {
  4091. /*
  4092. * We don't handle [83] so "S SystemLiteral" is required.
  4093. */
  4094. if (!IS_BLANK_CH(CUR)) {
  4095. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4096. "Space required after the Public Identifier\n");
  4097. }
  4098. } else {
  4099. /*
  4100. * We handle [83] so we return immediately, if
  4101. * "S SystemLiteral" is not detected. From a purely parsing
  4102. * point of view that's a nice mess.
  4103. */
  4104. const xmlChar *ptr;
  4105. GROW;
  4106. ptr = CUR_PTR;
  4107. if (!IS_BLANK_CH(*ptr)) return(NULL);
  4108. while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
  4109. if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
  4110. }
  4111. SKIP_BLANKS;
  4112. URI = xmlParseSystemLiteral(ctxt);
  4113. if (URI == NULL) {
  4114. xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
  4115. }
  4116. }
  4117. return(URI);
  4118. }
  4119. /**
  4120. * xmlParseCommentComplex:
  4121. * @ctxt: an XML parser context
  4122. * @buf: the already parsed part of the buffer
  4123. * @len: number of bytes filles in the buffer
  4124. * @size: allocated size of the buffer
  4125. *
  4126. * Skip an XML (SGML) comment <!-- .... -->
  4127. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4128. * must not occur within comments. "
  4129. * This is the slow routine in case the accelerator for ascii didn't work
  4130. *
  4131. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4132. */
  4133. static void
  4134. xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
  4135. int q, ql;
  4136. int r, rl;
  4137. int cur, l;
  4138. int count = 0;
  4139. int inputid;
  4140. inputid = ctxt->input->id;
  4141. if (buf == NULL) {
  4142. len = 0;
  4143. size = XML_PARSER_BUFFER_SIZE;
  4144. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4145. if (buf == NULL) {
  4146. xmlErrMemory(ctxt, NULL);
  4147. return;
  4148. }
  4149. }
  4150. GROW; /* Assure there's enough input data */
  4151. q = CUR_CHAR(ql);
  4152. if (q == 0)
  4153. goto not_terminated;
  4154. if (!IS_CHAR(q)) {
  4155. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4156. "xmlParseComment: invalid xmlChar value %d\n",
  4157. q);
  4158. xmlFree (buf);
  4159. return;
  4160. }
  4161. NEXTL(ql);
  4162. r = CUR_CHAR(rl);
  4163. if (r == 0)
  4164. goto not_terminated;
  4165. if (!IS_CHAR(r)) {
  4166. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4167. "xmlParseComment: invalid xmlChar value %d\n",
  4168. q);
  4169. xmlFree (buf);
  4170. return;
  4171. }
  4172. NEXTL(rl);
  4173. cur = CUR_CHAR(l);
  4174. if (cur == 0)
  4175. goto not_terminated;
  4176. while (IS_CHAR(cur) && /* checked */
  4177. ((cur != '>') ||
  4178. (r != '-') || (q != '-'))) {
  4179. if ((r == '-') && (q == '-')) {
  4180. xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
  4181. }
  4182. if (len + 5 >= size) {
  4183. xmlChar *new_buf;
  4184. size *= 2;
  4185. new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  4186. if (new_buf == NULL) {
  4187. xmlFree (buf);
  4188. xmlErrMemory(ctxt, NULL);
  4189. return;
  4190. }
  4191. buf = new_buf;
  4192. }
  4193. COPY_BUF(ql,buf,len,q);
  4194. q = r;
  4195. ql = rl;
  4196. r = cur;
  4197. rl = l;
  4198. count++;
  4199. if (count > 50) {
  4200. GROW;
  4201. count = 0;
  4202. }
  4203. NEXTL(l);
  4204. cur = CUR_CHAR(l);
  4205. if (cur == 0) {
  4206. SHRINK;
  4207. GROW;
  4208. cur = CUR_CHAR(l);
  4209. }
  4210. }
  4211. buf[len] = 0;
  4212. if (cur == 0) {
  4213. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4214. "Comment not terminated \n<!--%.50s\n", buf);
  4215. } else if (!IS_CHAR(cur)) {
  4216. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  4217. "xmlParseComment: invalid xmlChar value %d\n",
  4218. cur);
  4219. } else {
  4220. if (inputid != ctxt->input->id) {
  4221. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4222. "Comment doesn't start and stop in the same entity\n");
  4223. }
  4224. NEXT;
  4225. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4226. (!ctxt->disableSAX))
  4227. ctxt->sax->comment(ctxt->userData, buf);
  4228. }
  4229. xmlFree(buf);
  4230. return;
  4231. not_terminated:
  4232. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4233. "Comment not terminated\n", NULL);
  4234. xmlFree(buf);
  4235. return;
  4236. }
  4237. /**
  4238. * xmlParseComment:
  4239. * @ctxt: an XML parser context
  4240. *
  4241. * Skip an XML (SGML) comment <!-- .... -->
  4242. * The spec says that "For compatibility, the string "--" (double-hyphen)
  4243. * must not occur within comments. "
  4244. *
  4245. * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  4246. */
  4247. void
  4248. xmlParseComment(xmlParserCtxtPtr ctxt) {
  4249. xmlChar *buf = NULL;
  4250. int size = XML_PARSER_BUFFER_SIZE;
  4251. int len = 0;
  4252. xmlParserInputState state;
  4253. const xmlChar *in;
  4254. int nbchar = 0, ccol;
  4255. int inputid;
  4256. /*
  4257. * Check that there is a comment right here.
  4258. */
  4259. if ((RAW != '<') || (NXT(1) != '!') ||
  4260. (NXT(2) != '-') || (NXT(3) != '-')) return;
  4261. state = ctxt->instate;
  4262. ctxt->instate = XML_PARSER_COMMENT;
  4263. inputid = ctxt->input->id;
  4264. SKIP(4);
  4265. SHRINK;
  4266. GROW;
  4267. /*
  4268. * Accelerated common case where input don't need to be
  4269. * modified before passing it to the handler.
  4270. */
  4271. in = ctxt->input->cur;
  4272. do {
  4273. if (*in == 0xA) {
  4274. do {
  4275. ctxt->input->line++; ctxt->input->col = 1;
  4276. in++;
  4277. } while (*in == 0xA);
  4278. }
  4279. get_more:
  4280. ccol = ctxt->input->col;
  4281. while (((*in > '-') && (*in <= 0x7F)) ||
  4282. ((*in >= 0x20) && (*in < '-')) ||
  4283. (*in == 0x09)) {
  4284. in++;
  4285. ccol++;
  4286. }
  4287. ctxt->input->col = ccol;
  4288. if (*in == 0xA) {
  4289. do {
  4290. ctxt->input->line++; ctxt->input->col = 1;
  4291. in++;
  4292. } while (*in == 0xA);
  4293. goto get_more;
  4294. }
  4295. nbchar = in - ctxt->input->cur;
  4296. /*
  4297. * save current set of data
  4298. */
  4299. if (nbchar > 0) {
  4300. if ((ctxt->sax != NULL) &&
  4301. (ctxt->sax->comment != NULL)) {
  4302. if (buf == NULL) {
  4303. if ((*in == '-') && (in[1] == '-'))
  4304. size = nbchar + 1;
  4305. else
  4306. size = XML_PARSER_BUFFER_SIZE + nbchar;
  4307. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4308. if (buf == NULL) {
  4309. xmlErrMemory(ctxt, NULL);
  4310. ctxt->instate = state;
  4311. return;
  4312. }
  4313. len = 0;
  4314. } else if (len + nbchar + 1 >= size) {
  4315. xmlChar *new_buf;
  4316. size += len + nbchar + XML_PARSER_BUFFER_SIZE;
  4317. new_buf = (xmlChar *) xmlRealloc(buf,
  4318. size * sizeof(xmlChar));
  4319. if (new_buf == NULL) {
  4320. xmlFree (buf);
  4321. xmlErrMemory(ctxt, NULL);
  4322. ctxt->instate = state;
  4323. return;
  4324. }
  4325. buf = new_buf;
  4326. }
  4327. memcpy(&buf[len], ctxt->input->cur, nbchar);
  4328. len += nbchar;
  4329. buf[len] = 0;
  4330. }
  4331. }
  4332. ctxt->input->cur = in;
  4333. if (*in == 0xA) {
  4334. in++;
  4335. ctxt->input->line++; ctxt->input->col = 1;
  4336. }
  4337. if (*in == 0xD) {
  4338. in++;
  4339. if (*in == 0xA) {
  4340. ctxt->input->cur = in;
  4341. in++;
  4342. ctxt->input->line++; ctxt->input->col = 1;
  4343. continue; /* while */
  4344. }
  4345. in--;
  4346. }
  4347. SHRINK;
  4348. GROW;
  4349. in = ctxt->input->cur;
  4350. if (*in == '-') {
  4351. if (in[1] == '-') {
  4352. if (in[2] == '>') {
  4353. if (ctxt->input->id != inputid) {
  4354. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4355. "comment doesn't start and stop in the same entity\n");
  4356. }
  4357. SKIP(3);
  4358. if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
  4359. (!ctxt->disableSAX)) {
  4360. if (buf != NULL)
  4361. ctxt->sax->comment(ctxt->userData, buf);
  4362. else
  4363. ctxt->sax->comment(ctxt->userData, BAD_CAST "");
  4364. }
  4365. if (buf != NULL)
  4366. xmlFree(buf);
  4367. ctxt->instate = state;
  4368. return;
  4369. }
  4370. if (buf != NULL)
  4371. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4372. "Comment not terminated \n<!--%.50s\n",
  4373. buf);
  4374. else
  4375. xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
  4376. "Comment not terminated \n", NULL);
  4377. in++;
  4378. ctxt->input->col++;
  4379. }
  4380. in++;
  4381. ctxt->input->col++;
  4382. goto get_more;
  4383. }
  4384. } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
  4385. xmlParseCommentComplex(ctxt, buf, len, size);
  4386. ctxt->instate = state;
  4387. return;
  4388. }
  4389. /**
  4390. * xmlParsePITarget:
  4391. * @ctxt: an XML parser context
  4392. *
  4393. * parse the name of a PI
  4394. *
  4395. * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  4396. *
  4397. * Returns the PITarget name or NULL
  4398. */
  4399. const xmlChar *
  4400. xmlParsePITarget(xmlParserCtxtPtr ctxt) {
  4401. const xmlChar *name;
  4402. name = xmlParseName(ctxt);
  4403. if ((name != NULL) &&
  4404. ((name[0] == 'x') || (name[0] == 'X')) &&
  4405. ((name[1] == 'm') || (name[1] == 'M')) &&
  4406. ((name[2] == 'l') || (name[2] == 'L'))) {
  4407. int i;
  4408. if ((name[0] == 'x') && (name[1] == 'm') &&
  4409. (name[2] == 'l') && (name[3] == 0)) {
  4410. xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4411. "XML declaration allowed only at the start of the document\n");
  4412. return(name);
  4413. } else if (name[3] == 0) {
  4414. xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
  4415. return(name);
  4416. }
  4417. for (i = 0;;i++) {
  4418. if (xmlW3CPIs[i] == NULL) break;
  4419. if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
  4420. return(name);
  4421. }
  4422. xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
  4423. "xmlParsePITarget: invalid name prefix 'xml'\n",
  4424. NULL, NULL);
  4425. }
  4426. if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
  4427. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  4428. "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
  4429. }
  4430. return(name);
  4431. }
  4432. #ifdef LIBXML_CATALOG_ENABLED
  4433. /**
  4434. * xmlParseCatalogPI:
  4435. * @ctxt: an XML parser context
  4436. * @catalog: the PI value string
  4437. *
  4438. * parse an XML Catalog Processing Instruction.
  4439. *
  4440. * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
  4441. *
  4442. * Occurs only if allowed by the user and if happening in the Misc
  4443. * part of the document before any doctype informations
  4444. * This will add the given catalog to the parsing context in order
  4445. * to be used if there is a resolution need further down in the document
  4446. */
  4447. static void
  4448. xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
  4449. xmlChar *URL = NULL;
  4450. const xmlChar *tmp, *base;
  4451. xmlChar marker;
  4452. tmp = catalog;
  4453. while (IS_BLANK_CH(*tmp)) tmp++;
  4454. if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
  4455. goto error;
  4456. tmp += 7;
  4457. while (IS_BLANK_CH(*tmp)) tmp++;
  4458. if (*tmp != '=') {
  4459. return;
  4460. }
  4461. tmp++;
  4462. while (IS_BLANK_CH(*tmp)) tmp++;
  4463. marker = *tmp;
  4464. if ((marker != '\'') && (marker != '"'))
  4465. goto error;
  4466. tmp++;
  4467. base = tmp;
  4468. while ((*tmp != 0) && (*tmp != marker)) tmp++;
  4469. if (*tmp == 0)
  4470. goto error;
  4471. URL = xmlStrndup(base, tmp - base);
  4472. tmp++;
  4473. while (IS_BLANK_CH(*tmp)) tmp++;
  4474. if (*tmp != 0)
  4475. goto error;
  4476. if (URL != NULL) {
  4477. ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
  4478. xmlFree(URL);
  4479. }
  4480. return;
  4481. error:
  4482. xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
  4483. "Catalog PI syntax error: %s\n",
  4484. catalog, NULL);
  4485. if (URL != NULL)
  4486. xmlFree(URL);
  4487. }
  4488. #endif
  4489. /**
  4490. * xmlParsePI:
  4491. * @ctxt: an XML parser context
  4492. *
  4493. * parse an XML Processing Instruction.
  4494. *
  4495. * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  4496. *
  4497. * The processing is transfered to SAX once parsed.
  4498. */
  4499. void
  4500. xmlParsePI(xmlParserCtxtPtr ctxt) {
  4501. xmlChar *buf = NULL;
  4502. int len = 0;
  4503. int size = XML_PARSER_BUFFER_SIZE;
  4504. int cur, l;
  4505. const xmlChar *target;
  4506. xmlParserInputState state;
  4507. int count = 0;
  4508. if ((RAW == '<') && (NXT(1) == '?')) {
  4509. xmlParserInputPtr input = ctxt->input;
  4510. state = ctxt->instate;
  4511. ctxt->instate = XML_PARSER_PI;
  4512. /*
  4513. * this is a Processing Instruction.
  4514. */
  4515. SKIP(2);
  4516. SHRINK;
  4517. /*
  4518. * Parse the target name and check for special support like
  4519. * namespace.
  4520. */
  4521. target = xmlParsePITarget(ctxt);
  4522. if (target != NULL) {
  4523. if ((RAW == '?') && (NXT(1) == '>')) {
  4524. if (input != ctxt->input) {
  4525. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4526. "PI declaration doesn't start and stop in the same entity\n");
  4527. }
  4528. SKIP(2);
  4529. /*
  4530. * SAX: PI detected.
  4531. */
  4532. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4533. (ctxt->sax->processingInstruction != NULL))
  4534. ctxt->sax->processingInstruction(ctxt->userData,
  4535. target, NULL);
  4536. ctxt->instate = state;
  4537. return;
  4538. }
  4539. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  4540. if (buf == NULL) {
  4541. xmlErrMemory(ctxt, NULL);
  4542. ctxt->instate = state;
  4543. return;
  4544. }
  4545. cur = CUR;
  4546. if (!IS_BLANK(cur)) {
  4547. xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
  4548. "ParsePI: PI %s space expected\n", target);
  4549. }
  4550. SKIP_BLANKS;
  4551. cur = CUR_CHAR(l);
  4552. while (IS_CHAR(cur) && /* checked */
  4553. ((cur != '?') || (NXT(1) != '>'))) {
  4554. if (len + 5 >= size) {
  4555. xmlChar *tmp;
  4556. size *= 2;
  4557. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  4558. if (tmp == NULL) {
  4559. xmlErrMemory(ctxt, NULL);
  4560. xmlFree(buf);
  4561. ctxt->instate = state;
  4562. return;
  4563. }
  4564. buf = tmp;
  4565. }
  4566. count++;
  4567. if (count > 50) {
  4568. GROW;
  4569. count = 0;
  4570. }
  4571. COPY_BUF(l,buf,len,cur);
  4572. NEXTL(l);
  4573. cur = CUR_CHAR(l);
  4574. if (cur == 0) {
  4575. SHRINK;
  4576. GROW;
  4577. cur = CUR_CHAR(l);
  4578. }
  4579. }
  4580. buf[len] = 0;
  4581. if (cur != '?') {
  4582. xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
  4583. "ParsePI: PI %s never end ...\n", target);
  4584. } else {
  4585. if (input != ctxt->input) {
  4586. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4587. "PI declaration doesn't start and stop in the same entity\n");
  4588. }
  4589. SKIP(2);
  4590. #ifdef LIBXML_CATALOG_ENABLED
  4591. if (((state == XML_PARSER_MISC) ||
  4592. (state == XML_PARSER_START)) &&
  4593. (xmlStrEqual(target, XML_CATALOG_PI))) {
  4594. xmlCatalogAllow allow = xmlCatalogGetDefaults();
  4595. if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
  4596. (allow == XML_CATA_ALLOW_ALL))
  4597. xmlParseCatalogPI(ctxt, buf);
  4598. }
  4599. #endif
  4600. /*
  4601. * SAX: PI detected.
  4602. */
  4603. if ((ctxt->sax) && (!ctxt->disableSAX) &&
  4604. (ctxt->sax->processingInstruction != NULL))
  4605. ctxt->sax->processingInstruction(ctxt->userData,
  4606. target, buf);
  4607. }
  4608. xmlFree(buf);
  4609. } else {
  4610. xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
  4611. }
  4612. ctxt->instate = state;
  4613. }
  4614. }
  4615. /**
  4616. * xmlParseNotationDecl:
  4617. * @ctxt: an XML parser context
  4618. *
  4619. * parse a notation declaration
  4620. *
  4621. * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  4622. *
  4623. * Hence there is actually 3 choices:
  4624. * 'PUBLIC' S PubidLiteral
  4625. * 'PUBLIC' S PubidLiteral S SystemLiteral
  4626. * and 'SYSTEM' S SystemLiteral
  4627. *
  4628. * See the NOTE on xmlParseExternalID().
  4629. */
  4630. void
  4631. xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
  4632. const xmlChar *name;
  4633. xmlChar *Pubid;
  4634. xmlChar *Systemid;
  4635. if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  4636. xmlParserInputPtr input = ctxt->input;
  4637. SHRINK;
  4638. SKIP(10);
  4639. if (!IS_BLANK_CH(CUR)) {
  4640. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4641. "Space required after '<!NOTATION'\n");
  4642. return;
  4643. }
  4644. SKIP_BLANKS;
  4645. name = xmlParseName(ctxt);
  4646. if (name == NULL) {
  4647. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  4648. return;
  4649. }
  4650. if (!IS_BLANK_CH(CUR)) {
  4651. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4652. "Space required after the NOTATION name'\n");
  4653. return;
  4654. }
  4655. if (xmlStrchr(name, ':') != NULL) {
  4656. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  4657. "colon are forbidden from notation names '%s'\n",
  4658. name, NULL, NULL);
  4659. }
  4660. SKIP_BLANKS;
  4661. /*
  4662. * Parse the IDs.
  4663. */
  4664. Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
  4665. SKIP_BLANKS;
  4666. if (RAW == '>') {
  4667. if (input != ctxt->input) {
  4668. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4669. "Notation declaration doesn't start and stop in the same entity\n");
  4670. }
  4671. NEXT;
  4672. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  4673. (ctxt->sax->notationDecl != NULL))
  4674. ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
  4675. } else {
  4676. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  4677. }
  4678. if (Systemid != NULL) xmlFree(Systemid);
  4679. if (Pubid != NULL) xmlFree(Pubid);
  4680. }
  4681. }
  4682. /**
  4683. * xmlParseEntityDecl:
  4684. * @ctxt: an XML parser context
  4685. *
  4686. * parse <!ENTITY declarations
  4687. *
  4688. * [70] EntityDecl ::= GEDecl | PEDecl
  4689. *
  4690. * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
  4691. *
  4692. * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  4693. *
  4694. * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  4695. *
  4696. * [74] PEDef ::= EntityValue | ExternalID
  4697. *
  4698. * [76] NDataDecl ::= S 'NDATA' S Name
  4699. *
  4700. * [ VC: Notation Declared ]
  4701. * The Name must match the declared name of a notation.
  4702. */
  4703. void
  4704. xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
  4705. const xmlChar *name = NULL;
  4706. xmlChar *value = NULL;
  4707. xmlChar *URI = NULL, *literal = NULL;
  4708. const xmlChar *ndata = NULL;
  4709. int isParameter = 0;
  4710. xmlChar *orig = NULL;
  4711. int skipped;
  4712. /* GROW; done in the caller */
  4713. if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
  4714. xmlParserInputPtr input = ctxt->input;
  4715. SHRINK;
  4716. SKIP(8);
  4717. skipped = SKIP_BLANKS;
  4718. if (skipped == 0) {
  4719. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4720. "Space required after '<!ENTITY'\n");
  4721. }
  4722. if (RAW == '%') {
  4723. NEXT;
  4724. skipped = SKIP_BLANKS;
  4725. if (skipped == 0) {
  4726. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4727. "Space required after '%'\n");
  4728. }
  4729. isParameter = 1;
  4730. }
  4731. name = xmlParseName(ctxt);
  4732. if (name == NULL) {
  4733. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  4734. "xmlParseEntityDecl: no name\n");
  4735. return;
  4736. }
  4737. if (xmlStrchr(name, ':') != NULL) {
  4738. xmlNsErr(ctxt, XML_NS_ERR_COLON,
  4739. "colon are forbidden from entities names '%s'\n",
  4740. name, NULL, NULL);
  4741. }
  4742. skipped = SKIP_BLANKS;
  4743. if (skipped == 0) {
  4744. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4745. "Space required after the entity name\n");
  4746. }
  4747. ctxt->instate = XML_PARSER_ENTITY_DECL;
  4748. /*
  4749. * handle the various case of definitions...
  4750. */
  4751. if (isParameter) {
  4752. if ((RAW == '"') || (RAW == '\'')) {
  4753. value = xmlParseEntityValue(ctxt, &orig);
  4754. if (value) {
  4755. if ((ctxt->sax != NULL) &&
  4756. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  4757. ctxt->sax->entityDecl(ctxt->userData, name,
  4758. XML_INTERNAL_PARAMETER_ENTITY,
  4759. NULL, NULL, value);
  4760. }
  4761. } else {
  4762. URI = xmlParseExternalID(ctxt, &literal, 1);
  4763. if ((URI == NULL) && (literal == NULL)) {
  4764. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  4765. }
  4766. if (URI) {
  4767. xmlURIPtr uri;
  4768. uri = xmlParseURI((const char *) URI);
  4769. if (uri == NULL) {
  4770. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  4771. "Invalid URI: %s\n", URI);
  4772. /*
  4773. * This really ought to be a well formedness error
  4774. * but the XML Core WG decided otherwise c.f. issue
  4775. * E26 of the XML erratas.
  4776. */
  4777. } else {
  4778. if (uri->fragment != NULL) {
  4779. /*
  4780. * Okay this is foolish to block those but not
  4781. * invalid URIs.
  4782. */
  4783. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  4784. } else {
  4785. if ((ctxt->sax != NULL) &&
  4786. (!ctxt->disableSAX) &&
  4787. (ctxt->sax->entityDecl != NULL))
  4788. ctxt->sax->entityDecl(ctxt->userData, name,
  4789. XML_EXTERNAL_PARAMETER_ENTITY,
  4790. literal, URI, NULL);
  4791. }
  4792. xmlFreeURI(uri);
  4793. }
  4794. }
  4795. }
  4796. } else {
  4797. if ((RAW == '"') || (RAW == '\'')) {
  4798. value = xmlParseEntityValue(ctxt, &orig);
  4799. if ((ctxt->sax != NULL) &&
  4800. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  4801. ctxt->sax->entityDecl(ctxt->userData, name,
  4802. XML_INTERNAL_GENERAL_ENTITY,
  4803. NULL, NULL, value);
  4804. /*
  4805. * For expat compatibility in SAX mode.
  4806. */
  4807. if ((ctxt->myDoc == NULL) ||
  4808. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  4809. if (ctxt->myDoc == NULL) {
  4810. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  4811. if (ctxt->myDoc == NULL) {
  4812. xmlErrMemory(ctxt, "New Doc failed");
  4813. return;
  4814. }
  4815. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  4816. }
  4817. if (ctxt->myDoc->intSubset == NULL)
  4818. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  4819. BAD_CAST "fake", NULL, NULL);
  4820. xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
  4821. NULL, NULL, value);
  4822. }
  4823. } else {
  4824. URI = xmlParseExternalID(ctxt, &literal, 1);
  4825. if ((URI == NULL) && (literal == NULL)) {
  4826. xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
  4827. }
  4828. if (URI) {
  4829. xmlURIPtr uri;
  4830. uri = xmlParseURI((const char *)URI);
  4831. if (uri == NULL) {
  4832. xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
  4833. "Invalid URI: %s\n", URI);
  4834. /*
  4835. * This really ought to be a well formedness error
  4836. * but the XML Core WG decided otherwise c.f. issue
  4837. * E26 of the XML erratas.
  4838. */
  4839. } else {
  4840. if (uri->fragment != NULL) {
  4841. /*
  4842. * Okay this is foolish to block those but not
  4843. * invalid URIs.
  4844. */
  4845. xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
  4846. }
  4847. xmlFreeURI(uri);
  4848. }
  4849. }
  4850. if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
  4851. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4852. "Space required before 'NDATA'\n");
  4853. }
  4854. SKIP_BLANKS;
  4855. if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
  4856. SKIP(5);
  4857. if (!IS_BLANK_CH(CUR)) {
  4858. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4859. "Space required after 'NDATA'\n");
  4860. }
  4861. SKIP_BLANKS;
  4862. ndata = xmlParseName(ctxt);
  4863. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  4864. (ctxt->sax->unparsedEntityDecl != NULL))
  4865. ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
  4866. literal, URI, ndata);
  4867. } else {
  4868. if ((ctxt->sax != NULL) &&
  4869. (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
  4870. ctxt->sax->entityDecl(ctxt->userData, name,
  4871. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  4872. literal, URI, NULL);
  4873. /*
  4874. * For expat compatibility in SAX mode.
  4875. * assuming the entity repalcement was asked for
  4876. */
  4877. if ((ctxt->replaceEntities != 0) &&
  4878. ((ctxt->myDoc == NULL) ||
  4879. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
  4880. if (ctxt->myDoc == NULL) {
  4881. ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
  4882. if (ctxt->myDoc == NULL) {
  4883. xmlErrMemory(ctxt, "New Doc failed");
  4884. return;
  4885. }
  4886. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  4887. }
  4888. if (ctxt->myDoc->intSubset == NULL)
  4889. ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
  4890. BAD_CAST "fake", NULL, NULL);
  4891. xmlSAX2EntityDecl(ctxt, name,
  4892. XML_EXTERNAL_GENERAL_PARSED_ENTITY,
  4893. literal, URI, NULL);
  4894. }
  4895. }
  4896. }
  4897. }
  4898. SKIP_BLANKS;
  4899. if (RAW != '>') {
  4900. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
  4901. "xmlParseEntityDecl: entity %s not terminated\n", name);
  4902. } else {
  4903. if (input != ctxt->input) {
  4904. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  4905. "Entity declaration doesn't start and stop in the same entity\n");
  4906. }
  4907. NEXT;
  4908. }
  4909. if (orig != NULL) {
  4910. /*
  4911. * Ugly mechanism to save the raw entity value.
  4912. */
  4913. xmlEntityPtr cur = NULL;
  4914. if (isParameter) {
  4915. if ((ctxt->sax != NULL) &&
  4916. (ctxt->sax->getParameterEntity != NULL))
  4917. cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
  4918. } else {
  4919. if ((ctxt->sax != NULL) &&
  4920. (ctxt->sax->getEntity != NULL))
  4921. cur = ctxt->sax->getEntity(ctxt->userData, name);
  4922. if ((cur == NULL) && (ctxt->userData==ctxt)) {
  4923. cur = xmlSAX2GetEntity(ctxt, name);
  4924. }
  4925. }
  4926. if (cur != NULL) {
  4927. if (cur->orig != NULL)
  4928. xmlFree(orig);
  4929. else
  4930. cur->orig = orig;
  4931. } else
  4932. xmlFree(orig);
  4933. }
  4934. if (value != NULL) xmlFree(value);
  4935. if (URI != NULL) xmlFree(URI);
  4936. if (literal != NULL) xmlFree(literal);
  4937. }
  4938. }
  4939. /**
  4940. * xmlParseDefaultDecl:
  4941. * @ctxt: an XML parser context
  4942. * @value: Receive a possible fixed default value for the attribute
  4943. *
  4944. * Parse an attribute default declaration
  4945. *
  4946. * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  4947. *
  4948. * [ VC: Required Attribute ]
  4949. * if the default declaration is the keyword #REQUIRED, then the
  4950. * attribute must be specified for all elements of the type in the
  4951. * attribute-list declaration.
  4952. *
  4953. * [ VC: Attribute Default Legal ]
  4954. * The declared default value must meet the lexical constraints of
  4955. * the declared attribute type c.f. xmlValidateAttributeDecl()
  4956. *
  4957. * [ VC: Fixed Attribute Default ]
  4958. * if an attribute has a default value declared with the #FIXED
  4959. * keyword, instances of that attribute must match the default value.
  4960. *
  4961. * [ WFC: No < in Attribute Values ]
  4962. * handled in xmlParseAttValue()
  4963. *
  4964. * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
  4965. * or XML_ATTRIBUTE_FIXED.
  4966. */
  4967. int
  4968. xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
  4969. int val;
  4970. xmlChar *ret;
  4971. *value = NULL;
  4972. if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
  4973. SKIP(9);
  4974. return(XML_ATTRIBUTE_REQUIRED);
  4975. }
  4976. if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
  4977. SKIP(8);
  4978. return(XML_ATTRIBUTE_IMPLIED);
  4979. }
  4980. val = XML_ATTRIBUTE_NONE;
  4981. if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
  4982. SKIP(6);
  4983. val = XML_ATTRIBUTE_FIXED;
  4984. if (!IS_BLANK_CH(CUR)) {
  4985. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  4986. "Space required after '#FIXED'\n");
  4987. }
  4988. SKIP_BLANKS;
  4989. }
  4990. ret = xmlParseAttValue(ctxt);
  4991. ctxt->instate = XML_PARSER_DTD;
  4992. if (ret == NULL) {
  4993. xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
  4994. "Attribute default value declaration error\n");
  4995. } else
  4996. *value = ret;
  4997. return(val);
  4998. }
  4999. /**
  5000. * xmlParseNotationType:
  5001. * @ctxt: an XML parser context
  5002. *
  5003. * parse an Notation attribute type.
  5004. *
  5005. * Note: the leading 'NOTATION' S part has already being parsed...
  5006. *
  5007. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5008. *
  5009. * [ VC: Notation Attributes ]
  5010. * Values of this type must match one of the notation names included
  5011. * in the declaration; all notation names in the declaration must be declared.
  5012. *
  5013. * Returns: the notation attribute tree built while parsing
  5014. */
  5015. xmlEnumerationPtr
  5016. xmlParseNotationType(xmlParserCtxtPtr ctxt) {
  5017. const xmlChar *name;
  5018. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5019. if (RAW != '(') {
  5020. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
  5021. return(NULL);
  5022. }
  5023. SHRINK;
  5024. do {
  5025. NEXT;
  5026. SKIP_BLANKS;
  5027. name = xmlParseName(ctxt);
  5028. if (name == NULL) {
  5029. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5030. "Name expected in NOTATION declaration\n");
  5031. xmlFreeEnumeration(ret);
  5032. return(NULL);
  5033. }
  5034. tmp = ret;
  5035. while (tmp != NULL) {
  5036. if (xmlStrEqual(name, tmp->name)) {
  5037. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5038. "standalone: attribute notation value token %s duplicated\n",
  5039. name, NULL);
  5040. if (!xmlDictOwns(ctxt->dict, name))
  5041. xmlFree((xmlChar *) name);
  5042. break;
  5043. }
  5044. tmp = tmp->next;
  5045. }
  5046. if (tmp == NULL) {
  5047. cur = xmlCreateEnumeration(name);
  5048. if (cur == NULL) {
  5049. xmlFreeEnumeration(ret);
  5050. return(NULL);
  5051. }
  5052. if (last == NULL) ret = last = cur;
  5053. else {
  5054. last->next = cur;
  5055. last = cur;
  5056. }
  5057. }
  5058. SKIP_BLANKS;
  5059. } while (RAW == '|');
  5060. if (RAW != ')') {
  5061. xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
  5062. xmlFreeEnumeration(ret);
  5063. return(NULL);
  5064. }
  5065. NEXT;
  5066. return(ret);
  5067. }
  5068. /**
  5069. * xmlParseEnumerationType:
  5070. * @ctxt: an XML parser context
  5071. *
  5072. * parse an Enumeration attribute type.
  5073. *
  5074. * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  5075. *
  5076. * [ VC: Enumeration ]
  5077. * Values of this type must match one of the Nmtoken tokens in
  5078. * the declaration
  5079. *
  5080. * Returns: the enumeration attribute tree built while parsing
  5081. */
  5082. xmlEnumerationPtr
  5083. xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
  5084. xmlChar *name;
  5085. xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
  5086. if (RAW != '(') {
  5087. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
  5088. return(NULL);
  5089. }
  5090. SHRINK;
  5091. do {
  5092. NEXT;
  5093. SKIP_BLANKS;
  5094. name = xmlParseNmtoken(ctxt);
  5095. if (name == NULL) {
  5096. xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
  5097. return(ret);
  5098. }
  5099. tmp = ret;
  5100. while (tmp != NULL) {
  5101. if (xmlStrEqual(name, tmp->name)) {
  5102. xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
  5103. "standalone: attribute enumeration value token %s duplicated\n",
  5104. name, NULL);
  5105. if (!xmlDictOwns(ctxt->dict, name))
  5106. xmlFree(name);
  5107. break;
  5108. }
  5109. tmp = tmp->next;
  5110. }
  5111. if (tmp == NULL) {
  5112. cur = xmlCreateEnumeration(name);
  5113. if (!xmlDictOwns(ctxt->dict, name))
  5114. xmlFree(name);
  5115. if (cur == NULL) {
  5116. xmlFreeEnumeration(ret);
  5117. return(NULL);
  5118. }
  5119. if (last == NULL) ret = last = cur;
  5120. else {
  5121. last->next = cur;
  5122. last = cur;
  5123. }
  5124. }
  5125. SKIP_BLANKS;
  5126. } while (RAW == '|');
  5127. if (RAW != ')') {
  5128. xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
  5129. return(ret);
  5130. }
  5131. NEXT;
  5132. return(ret);
  5133. }
  5134. /**
  5135. * xmlParseEnumeratedType:
  5136. * @ctxt: an XML parser context
  5137. * @tree: the enumeration tree built while parsing
  5138. *
  5139. * parse an Enumerated attribute type.
  5140. *
  5141. * [57] EnumeratedType ::= NotationType | Enumeration
  5142. *
  5143. * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  5144. *
  5145. *
  5146. * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
  5147. */
  5148. int
  5149. xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5150. if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
  5151. SKIP(8);
  5152. if (!IS_BLANK_CH(CUR)) {
  5153. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5154. "Space required after 'NOTATION'\n");
  5155. return(0);
  5156. }
  5157. SKIP_BLANKS;
  5158. *tree = xmlParseNotationType(ctxt);
  5159. if (*tree == NULL) return(0);
  5160. return(XML_ATTRIBUTE_NOTATION);
  5161. }
  5162. *tree = xmlParseEnumerationType(ctxt);
  5163. if (*tree == NULL) return(0);
  5164. return(XML_ATTRIBUTE_ENUMERATION);
  5165. }
  5166. /**
  5167. * xmlParseAttributeType:
  5168. * @ctxt: an XML parser context
  5169. * @tree: the enumeration tree built while parsing
  5170. *
  5171. * parse the Attribute list def for an element
  5172. *
  5173. * [54] AttType ::= StringType | TokenizedType | EnumeratedType
  5174. *
  5175. * [55] StringType ::= 'CDATA'
  5176. *
  5177. * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
  5178. * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
  5179. *
  5180. * Validity constraints for attribute values syntax are checked in
  5181. * xmlValidateAttributeValue()
  5182. *
  5183. * [ VC: ID ]
  5184. * Values of type ID must match the Name production. A name must not
  5185. * appear more than once in an XML document as a value of this type;
  5186. * i.e., ID values must uniquely identify the elements which bear them.
  5187. *
  5188. * [ VC: One ID per Element Type ]
  5189. * No element type may have more than one ID attribute specified.
  5190. *
  5191. * [ VC: ID Attribute Default ]
  5192. * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
  5193. *
  5194. * [ VC: IDREF ]
  5195. * Values of type IDREF must match the Name production, and values
  5196. * of type IDREFS must match Names; each IDREF Name must match the value
  5197. * of an ID attribute on some element in the XML document; i.e. IDREF
  5198. * values must match the value of some ID attribute.
  5199. *
  5200. * [ VC: Entity Name ]
  5201. * Values of type ENTITY must match the Name production, values
  5202. * of type ENTITIES must match Names; each Entity Name must match the
  5203. * name of an unparsed entity declared in the DTD.
  5204. *
  5205. * [ VC: Name Token ]
  5206. * Values of type NMTOKEN must match the Nmtoken production; values
  5207. * of type NMTOKENS must match Nmtokens.
  5208. *
  5209. * Returns the attribute type
  5210. */
  5211. int
  5212. xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
  5213. SHRINK;
  5214. if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
  5215. SKIP(5);
  5216. return(XML_ATTRIBUTE_CDATA);
  5217. } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
  5218. SKIP(6);
  5219. return(XML_ATTRIBUTE_IDREFS);
  5220. } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
  5221. SKIP(5);
  5222. return(XML_ATTRIBUTE_IDREF);
  5223. } else if ((RAW == 'I') && (NXT(1) == 'D')) {
  5224. SKIP(2);
  5225. return(XML_ATTRIBUTE_ID);
  5226. } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
  5227. SKIP(6);
  5228. return(XML_ATTRIBUTE_ENTITY);
  5229. } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
  5230. SKIP(8);
  5231. return(XML_ATTRIBUTE_ENTITIES);
  5232. } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
  5233. SKIP(8);
  5234. return(XML_ATTRIBUTE_NMTOKENS);
  5235. } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
  5236. SKIP(7);
  5237. return(XML_ATTRIBUTE_NMTOKEN);
  5238. }
  5239. return(xmlParseEnumeratedType(ctxt, tree));
  5240. }
  5241. /**
  5242. * xmlParseAttributeListDecl:
  5243. * @ctxt: an XML parser context
  5244. *
  5245. * : parse the Attribute list def for an element
  5246. *
  5247. * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  5248. *
  5249. * [53] AttDef ::= S Name S AttType S DefaultDecl
  5250. *
  5251. */
  5252. void
  5253. xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
  5254. const xmlChar *elemName;
  5255. const xmlChar *attrName;
  5256. xmlEnumerationPtr tree;
  5257. if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
  5258. xmlParserInputPtr input = ctxt->input;
  5259. SKIP(9);
  5260. if (!IS_BLANK_CH(CUR)) {
  5261. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5262. "Space required after '<!ATTLIST'\n");
  5263. }
  5264. SKIP_BLANKS;
  5265. elemName = xmlParseName(ctxt);
  5266. if (elemName == NULL) {
  5267. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5268. "ATTLIST: no name for Element\n");
  5269. return;
  5270. }
  5271. SKIP_BLANKS;
  5272. GROW;
  5273. while (RAW != '>') {
  5274. const xmlChar *check = CUR_PTR;
  5275. int type;
  5276. int def;
  5277. xmlChar *defaultValue = NULL;
  5278. GROW;
  5279. tree = NULL;
  5280. attrName = xmlParseName(ctxt);
  5281. if (attrName == NULL) {
  5282. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5283. "ATTLIST: no name for Attribute\n");
  5284. break;
  5285. }
  5286. GROW;
  5287. if (!IS_BLANK_CH(CUR)) {
  5288. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5289. "Space required after the attribute name\n");
  5290. break;
  5291. }
  5292. SKIP_BLANKS;
  5293. type = xmlParseAttributeType(ctxt, &tree);
  5294. if (type <= 0) {
  5295. break;
  5296. }
  5297. GROW;
  5298. if (!IS_BLANK_CH(CUR)) {
  5299. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5300. "Space required after the attribute type\n");
  5301. if (tree != NULL)
  5302. xmlFreeEnumeration(tree);
  5303. break;
  5304. }
  5305. SKIP_BLANKS;
  5306. def = xmlParseDefaultDecl(ctxt, &defaultValue);
  5307. if (def <= 0) {
  5308. if (defaultValue != NULL)
  5309. xmlFree(defaultValue);
  5310. if (tree != NULL)
  5311. xmlFreeEnumeration(tree);
  5312. break;
  5313. }
  5314. if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
  5315. xmlAttrNormalizeSpace(defaultValue, defaultValue);
  5316. GROW;
  5317. if (RAW != '>') {
  5318. if (!IS_BLANK_CH(CUR)) {
  5319. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5320. "Space required after the attribute default value\n");
  5321. if (defaultValue != NULL)
  5322. xmlFree(defaultValue);
  5323. if (tree != NULL)
  5324. xmlFreeEnumeration(tree);
  5325. break;
  5326. }
  5327. SKIP_BLANKS;
  5328. }
  5329. if (check == CUR_PTR) {
  5330. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  5331. "in xmlParseAttributeListDecl\n");
  5332. if (defaultValue != NULL)
  5333. xmlFree(defaultValue);
  5334. if (tree != NULL)
  5335. xmlFreeEnumeration(tree);
  5336. break;
  5337. }
  5338. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5339. (ctxt->sax->attributeDecl != NULL))
  5340. ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
  5341. type, def, defaultValue, tree);
  5342. else if (tree != NULL)
  5343. xmlFreeEnumeration(tree);
  5344. if ((ctxt->sax2) && (defaultValue != NULL) &&
  5345. (def != XML_ATTRIBUTE_IMPLIED) &&
  5346. (def != XML_ATTRIBUTE_REQUIRED)) {
  5347. xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
  5348. }
  5349. if (ctxt->sax2) {
  5350. xmlAddSpecialAttr(ctxt, elemName, attrName, type);
  5351. }
  5352. if (defaultValue != NULL)
  5353. xmlFree(defaultValue);
  5354. GROW;
  5355. }
  5356. if (RAW == '>') {
  5357. if (input != ctxt->input) {
  5358. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5359. "Attribute list declaration doesn't start and stop in the same entity\n",
  5360. NULL, NULL);
  5361. }
  5362. NEXT;
  5363. }
  5364. }
  5365. }
  5366. /**
  5367. * xmlParseElementMixedContentDecl:
  5368. * @ctxt: an XML parser context
  5369. * @inputchk: the input used for the current entity, needed for boundary checks
  5370. *
  5371. * parse the declaration for a Mixed Element content
  5372. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5373. *
  5374. * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
  5375. * '(' S? '#PCDATA' S? ')'
  5376. *
  5377. * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
  5378. *
  5379. * [ VC: No Duplicate Types ]
  5380. * The same name must not appear more than once in a single
  5381. * mixed-content declaration.
  5382. *
  5383. * returns: the list of the xmlElementContentPtr describing the element choices
  5384. */
  5385. xmlElementContentPtr
  5386. xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  5387. xmlElementContentPtr ret = NULL, cur = NULL, n;
  5388. const xmlChar *elem = NULL;
  5389. GROW;
  5390. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  5391. SKIP(7);
  5392. SKIP_BLANKS;
  5393. SHRINK;
  5394. if (RAW == ')') {
  5395. if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
  5396. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5397. "Element content declaration doesn't start and stop in the same entity\n",
  5398. NULL, NULL);
  5399. }
  5400. NEXT;
  5401. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5402. if (ret == NULL)
  5403. return(NULL);
  5404. if (RAW == '*') {
  5405. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5406. NEXT;
  5407. }
  5408. return(ret);
  5409. }
  5410. if ((RAW == '(') || (RAW == '|')) {
  5411. ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
  5412. if (ret == NULL) return(NULL);
  5413. }
  5414. while (RAW == '|') {
  5415. NEXT;
  5416. if (elem == NULL) {
  5417. ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5418. if (ret == NULL) return(NULL);
  5419. ret->c1 = cur;
  5420. if (cur != NULL)
  5421. cur->parent = ret;
  5422. cur = ret;
  5423. } else {
  5424. n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5425. if (n == NULL) return(NULL);
  5426. n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5427. if (n->c1 != NULL)
  5428. n->c1->parent = n;
  5429. cur->c2 = n;
  5430. if (n != NULL)
  5431. n->parent = cur;
  5432. cur = n;
  5433. }
  5434. SKIP_BLANKS;
  5435. elem = xmlParseName(ctxt);
  5436. if (elem == NULL) {
  5437. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5438. "xmlParseElementMixedContentDecl : Name expected\n");
  5439. xmlFreeDocElementContent(ctxt->myDoc, cur);
  5440. return(NULL);
  5441. }
  5442. SKIP_BLANKS;
  5443. GROW;
  5444. }
  5445. if ((RAW == ')') && (NXT(1) == '*')) {
  5446. if (elem != NULL) {
  5447. cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
  5448. XML_ELEMENT_CONTENT_ELEMENT);
  5449. if (cur->c2 != NULL)
  5450. cur->c2->parent = cur;
  5451. }
  5452. if (ret != NULL)
  5453. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5454. if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
  5455. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5456. "Element content declaration doesn't start and stop in the same entity\n",
  5457. NULL, NULL);
  5458. }
  5459. SKIP(2);
  5460. } else {
  5461. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5462. xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
  5463. return(NULL);
  5464. }
  5465. } else {
  5466. xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
  5467. }
  5468. return(ret);
  5469. }
  5470. /**
  5471. * xmlParseElementChildrenContentDeclPriv:
  5472. * @ctxt: an XML parser context
  5473. * @inputchk: the input used for the current entity, needed for boundary checks
  5474. * @depth: the level of recursion
  5475. *
  5476. * parse the declaration for a Mixed Element content
  5477. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5478. *
  5479. *
  5480. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  5481. *
  5482. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  5483. *
  5484. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  5485. *
  5486. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  5487. *
  5488. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  5489. * TODO Parameter-entity replacement text must be properly nested
  5490. * with parenthesized groups. That is to say, if either of the
  5491. * opening or closing parentheses in a choice, seq, or Mixed
  5492. * construct is contained in the replacement text for a parameter
  5493. * entity, both must be contained in the same replacement text. For
  5494. * interoperability, if a parameter-entity reference appears in a
  5495. * choice, seq, or Mixed construct, its replacement text should not
  5496. * be empty, and neither the first nor last non-blank character of
  5497. * the replacement text should be a connector (| or ,).
  5498. *
  5499. * Returns the tree of xmlElementContentPtr describing the element
  5500. * hierarchy.
  5501. */
  5502. static xmlElementContentPtr
  5503. xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
  5504. int depth) {
  5505. xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
  5506. const xmlChar *elem;
  5507. xmlChar type = 0;
  5508. if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
  5509. (depth > 2048)) {
  5510. xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
  5511. "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
  5512. depth);
  5513. return(NULL);
  5514. }
  5515. SKIP_BLANKS;
  5516. GROW;
  5517. if (RAW == '(') {
  5518. int inputid = ctxt->input->id;
  5519. /* Recurse on first child */
  5520. NEXT;
  5521. SKIP_BLANKS;
  5522. cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  5523. depth + 1);
  5524. SKIP_BLANKS;
  5525. GROW;
  5526. } else {
  5527. elem = xmlParseName(ctxt);
  5528. if (elem == NULL) {
  5529. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  5530. return(NULL);
  5531. }
  5532. cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5533. if (cur == NULL) {
  5534. xmlErrMemory(ctxt, NULL);
  5535. return(NULL);
  5536. }
  5537. GROW;
  5538. if (RAW == '?') {
  5539. cur->ocur = XML_ELEMENT_CONTENT_OPT;
  5540. NEXT;
  5541. } else if (RAW == '*') {
  5542. cur->ocur = XML_ELEMENT_CONTENT_MULT;
  5543. NEXT;
  5544. } else if (RAW == '+') {
  5545. cur->ocur = XML_ELEMENT_CONTENT_PLUS;
  5546. NEXT;
  5547. } else {
  5548. cur->ocur = XML_ELEMENT_CONTENT_ONCE;
  5549. }
  5550. GROW;
  5551. }
  5552. SKIP_BLANKS;
  5553. SHRINK;
  5554. while (RAW != ')') {
  5555. /*
  5556. * Each loop we parse one separator and one element.
  5557. */
  5558. if (RAW == ',') {
  5559. if (type == 0) type = CUR;
  5560. /*
  5561. * Detect "Name | Name , Name" error
  5562. */
  5563. else if (type != CUR) {
  5564. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5565. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5566. type);
  5567. if ((last != NULL) && (last != ret))
  5568. xmlFreeDocElementContent(ctxt->myDoc, last);
  5569. if (ret != NULL)
  5570. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5571. return(NULL);
  5572. }
  5573. NEXT;
  5574. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
  5575. if (op == NULL) {
  5576. if ((last != NULL) && (last != ret))
  5577. xmlFreeDocElementContent(ctxt->myDoc, last);
  5578. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5579. return(NULL);
  5580. }
  5581. if (last == NULL) {
  5582. op->c1 = ret;
  5583. if (ret != NULL)
  5584. ret->parent = op;
  5585. ret = cur = op;
  5586. } else {
  5587. cur->c2 = op;
  5588. if (op != NULL)
  5589. op->parent = cur;
  5590. op->c1 = last;
  5591. if (last != NULL)
  5592. last->parent = op;
  5593. cur =op;
  5594. last = NULL;
  5595. }
  5596. } else if (RAW == '|') {
  5597. if (type == 0) type = CUR;
  5598. /*
  5599. * Detect "Name , Name | Name" error
  5600. */
  5601. else if (type != CUR) {
  5602. xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
  5603. "xmlParseElementChildrenContentDecl : '%c' expected\n",
  5604. type);
  5605. if ((last != NULL) && (last != ret))
  5606. xmlFreeDocElementContent(ctxt->myDoc, last);
  5607. if (ret != NULL)
  5608. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5609. return(NULL);
  5610. }
  5611. NEXT;
  5612. op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
  5613. if (op == NULL) {
  5614. if ((last != NULL) && (last != ret))
  5615. xmlFreeDocElementContent(ctxt->myDoc, last);
  5616. if (ret != NULL)
  5617. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5618. return(NULL);
  5619. }
  5620. if (last == NULL) {
  5621. op->c1 = ret;
  5622. if (ret != NULL)
  5623. ret->parent = op;
  5624. ret = cur = op;
  5625. } else {
  5626. cur->c2 = op;
  5627. if (op != NULL)
  5628. op->parent = cur;
  5629. op->c1 = last;
  5630. if (last != NULL)
  5631. last->parent = op;
  5632. cur =op;
  5633. last = NULL;
  5634. }
  5635. } else {
  5636. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
  5637. if ((last != NULL) && (last != ret))
  5638. xmlFreeDocElementContent(ctxt->myDoc, last);
  5639. if (ret != NULL)
  5640. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5641. return(NULL);
  5642. }
  5643. GROW;
  5644. SKIP_BLANKS;
  5645. GROW;
  5646. if (RAW == '(') {
  5647. int inputid = ctxt->input->id;
  5648. /* Recurse on second child */
  5649. NEXT;
  5650. SKIP_BLANKS;
  5651. last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
  5652. depth + 1);
  5653. SKIP_BLANKS;
  5654. } else {
  5655. elem = xmlParseName(ctxt);
  5656. if (elem == NULL) {
  5657. xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
  5658. if (ret != NULL)
  5659. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5660. return(NULL);
  5661. }
  5662. last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
  5663. if (last == NULL) {
  5664. if (ret != NULL)
  5665. xmlFreeDocElementContent(ctxt->myDoc, ret);
  5666. return(NULL);
  5667. }
  5668. if (RAW == '?') {
  5669. last->ocur = XML_ELEMENT_CONTENT_OPT;
  5670. NEXT;
  5671. } else if (RAW == '*') {
  5672. last->ocur = XML_ELEMENT_CONTENT_MULT;
  5673. NEXT;
  5674. } else if (RAW == '+') {
  5675. last->ocur = XML_ELEMENT_CONTENT_PLUS;
  5676. NEXT;
  5677. } else {
  5678. last->ocur = XML_ELEMENT_CONTENT_ONCE;
  5679. }
  5680. }
  5681. SKIP_BLANKS;
  5682. GROW;
  5683. }
  5684. if ((cur != NULL) && (last != NULL)) {
  5685. cur->c2 = last;
  5686. if (last != NULL)
  5687. last->parent = cur;
  5688. }
  5689. if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
  5690. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5691. "Element content declaration doesn't start and stop in the same entity\n",
  5692. NULL, NULL);
  5693. }
  5694. NEXT;
  5695. if (RAW == '?') {
  5696. if (ret != NULL) {
  5697. if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
  5698. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  5699. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5700. else
  5701. ret->ocur = XML_ELEMENT_CONTENT_OPT;
  5702. }
  5703. NEXT;
  5704. } else if (RAW == '*') {
  5705. if (ret != NULL) {
  5706. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5707. cur = ret;
  5708. /*
  5709. * Some normalization:
  5710. * (a | b* | c?)* == (a | b | c)*
  5711. */
  5712. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  5713. if ((cur->c1 != NULL) &&
  5714. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  5715. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
  5716. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  5717. if ((cur->c2 != NULL) &&
  5718. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  5719. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
  5720. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  5721. cur = cur->c2;
  5722. }
  5723. }
  5724. NEXT;
  5725. } else if (RAW == '+') {
  5726. if (ret != NULL) {
  5727. int found = 0;
  5728. if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
  5729. (ret->ocur == XML_ELEMENT_CONTENT_MULT))
  5730. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5731. else
  5732. ret->ocur = XML_ELEMENT_CONTENT_PLUS;
  5733. /*
  5734. * Some normalization:
  5735. * (a | b*)+ == (a | b)*
  5736. * (a | b?)+ == (a | b)*
  5737. */
  5738. while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
  5739. if ((cur->c1 != NULL) &&
  5740. ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
  5741. (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
  5742. cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
  5743. found = 1;
  5744. }
  5745. if ((cur->c2 != NULL) &&
  5746. ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
  5747. (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
  5748. cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
  5749. found = 1;
  5750. }
  5751. cur = cur->c2;
  5752. }
  5753. if (found)
  5754. ret->ocur = XML_ELEMENT_CONTENT_MULT;
  5755. }
  5756. NEXT;
  5757. }
  5758. return(ret);
  5759. }
  5760. /**
  5761. * xmlParseElementChildrenContentDecl:
  5762. * @ctxt: an XML parser context
  5763. * @inputchk: the input used for the current entity, needed for boundary checks
  5764. *
  5765. * parse the declaration for a Mixed Element content
  5766. * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
  5767. *
  5768. * [47] children ::= (choice | seq) ('?' | '*' | '+')?
  5769. *
  5770. * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  5771. *
  5772. * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  5773. *
  5774. * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  5775. *
  5776. * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
  5777. * TODO Parameter-entity replacement text must be properly nested
  5778. * with parenthesized groups. That is to say, if either of the
  5779. * opening or closing parentheses in a choice, seq, or Mixed
  5780. * construct is contained in the replacement text for a parameter
  5781. * entity, both must be contained in the same replacement text. For
  5782. * interoperability, if a parameter-entity reference appears in a
  5783. * choice, seq, or Mixed construct, its replacement text should not
  5784. * be empty, and neither the first nor last non-blank character of
  5785. * the replacement text should be a connector (| or ,).
  5786. *
  5787. * Returns the tree of xmlElementContentPtr describing the element
  5788. * hierarchy.
  5789. */
  5790. xmlElementContentPtr
  5791. xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
  5792. /* stub left for API/ABI compat */
  5793. return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
  5794. }
  5795. /**
  5796. * xmlParseElementContentDecl:
  5797. * @ctxt: an XML parser context
  5798. * @name: the name of the element being defined.
  5799. * @result: the Element Content pointer will be stored here if any
  5800. *
  5801. * parse the declaration for an Element content either Mixed or Children,
  5802. * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
  5803. *
  5804. * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  5805. *
  5806. * returns: the type of element content XML_ELEMENT_TYPE_xxx
  5807. */
  5808. int
  5809. xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
  5810. xmlElementContentPtr *result) {
  5811. xmlElementContentPtr tree = NULL;
  5812. int inputid = ctxt->input->id;
  5813. int res;
  5814. *result = NULL;
  5815. if (RAW != '(') {
  5816. xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  5817. "xmlParseElementContentDecl : %s '(' expected\n", name);
  5818. return(-1);
  5819. }
  5820. NEXT;
  5821. GROW;
  5822. SKIP_BLANKS;
  5823. if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
  5824. tree = xmlParseElementMixedContentDecl(ctxt, inputid);
  5825. res = XML_ELEMENT_TYPE_MIXED;
  5826. } else {
  5827. tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
  5828. res = XML_ELEMENT_TYPE_ELEMENT;
  5829. }
  5830. SKIP_BLANKS;
  5831. *result = tree;
  5832. return(res);
  5833. }
  5834. /**
  5835. * xmlParseElementDecl:
  5836. * @ctxt: an XML parser context
  5837. *
  5838. * parse an Element declaration.
  5839. *
  5840. * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  5841. *
  5842. * [ VC: Unique Element Type Declaration ]
  5843. * No element type may be declared more than once
  5844. *
  5845. * Returns the type of the element, or -1 in case of error
  5846. */
  5847. int
  5848. xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
  5849. const xmlChar *name;
  5850. int ret = -1;
  5851. xmlElementContentPtr content = NULL;
  5852. /* GROW; done in the caller */
  5853. if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
  5854. xmlParserInputPtr input = ctxt->input;
  5855. SKIP(9);
  5856. if (!IS_BLANK_CH(CUR)) {
  5857. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5858. "Space required after 'ELEMENT'\n");
  5859. }
  5860. SKIP_BLANKS;
  5861. name = xmlParseName(ctxt);
  5862. if (name == NULL) {
  5863. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  5864. "xmlParseElementDecl: no name for Element\n");
  5865. return(-1);
  5866. }
  5867. while ((RAW == 0) && (ctxt->inputNr > 1))
  5868. xmlPopInput(ctxt);
  5869. if (!IS_BLANK_CH(CUR)) {
  5870. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  5871. "Space required after the element name\n");
  5872. }
  5873. SKIP_BLANKS;
  5874. if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
  5875. SKIP(5);
  5876. /*
  5877. * Element must always be empty.
  5878. */
  5879. ret = XML_ELEMENT_TYPE_EMPTY;
  5880. } else if ((RAW == 'A') && (NXT(1) == 'N') &&
  5881. (NXT(2) == 'Y')) {
  5882. SKIP(3);
  5883. /*
  5884. * Element is a generic container.
  5885. */
  5886. ret = XML_ELEMENT_TYPE_ANY;
  5887. } else if (RAW == '(') {
  5888. ret = xmlParseElementContentDecl(ctxt, name, &content);
  5889. } else {
  5890. /*
  5891. * [ WFC: PEs in Internal Subset ] error handling.
  5892. */
  5893. if ((RAW == '%') && (ctxt->external == 0) &&
  5894. (ctxt->inputNr == 1)) {
  5895. xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
  5896. "PEReference: forbidden within markup decl in internal subset\n");
  5897. } else {
  5898. xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
  5899. "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
  5900. }
  5901. return(-1);
  5902. }
  5903. SKIP_BLANKS;
  5904. /*
  5905. * Pop-up of finished entities.
  5906. */
  5907. while ((RAW == 0) && (ctxt->inputNr > 1))
  5908. xmlPopInput(ctxt);
  5909. SKIP_BLANKS;
  5910. if (RAW != '>') {
  5911. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  5912. if (content != NULL) {
  5913. xmlFreeDocElementContent(ctxt->myDoc, content);
  5914. }
  5915. } else {
  5916. if (input != ctxt->input) {
  5917. xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5918. "Element declaration doesn't start and stop in the same entity\n");
  5919. }
  5920. NEXT;
  5921. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  5922. (ctxt->sax->elementDecl != NULL)) {
  5923. if (content != NULL)
  5924. content->parent = NULL;
  5925. ctxt->sax->elementDecl(ctxt->userData, name, ret,
  5926. content);
  5927. if ((content != NULL) && (content->parent == NULL)) {
  5928. /*
  5929. * this is a trick: if xmlAddElementDecl is called,
  5930. * instead of copying the full tree it is plugged directly
  5931. * if called from the parser. Avoid duplicating the
  5932. * interfaces or change the API/ABI
  5933. */
  5934. xmlFreeDocElementContent(ctxt->myDoc, content);
  5935. }
  5936. } else if (content != NULL) {
  5937. xmlFreeDocElementContent(ctxt->myDoc, content);
  5938. }
  5939. }
  5940. }
  5941. return(ret);
  5942. }
  5943. /**
  5944. * xmlParseConditionalSections
  5945. * @ctxt: an XML parser context
  5946. *
  5947. * [61] conditionalSect ::= includeSect | ignoreSect
  5948. * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  5949. * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  5950. * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  5951. * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  5952. */
  5953. static void
  5954. xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
  5955. int id = ctxt->input->id;
  5956. SKIP(3);
  5957. SKIP_BLANKS;
  5958. if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
  5959. SKIP(7);
  5960. SKIP_BLANKS;
  5961. if (RAW != '[') {
  5962. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  5963. } else {
  5964. if (ctxt->input->id != id) {
  5965. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  5966. "All markup of the conditional section is not in the same entity\n",
  5967. NULL, NULL);
  5968. }
  5969. NEXT;
  5970. }
  5971. if (xmlParserDebugEntities) {
  5972. if ((ctxt->input != NULL) && (ctxt->input->filename))
  5973. xmlGenericError(xmlGenericErrorContext,
  5974. "%s(%d): ", ctxt->input->filename,
  5975. ctxt->input->line);
  5976. xmlGenericError(xmlGenericErrorContext,
  5977. "Entering INCLUDE Conditional Section\n");
  5978. }
  5979. while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
  5980. (NXT(2) != '>'))) {
  5981. const xmlChar *check = CUR_PTR;
  5982. unsigned int cons = ctxt->input->consumed;
  5983. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  5984. xmlParseConditionalSections(ctxt);
  5985. } else if (IS_BLANK_CH(CUR)) {
  5986. NEXT;
  5987. } else if (RAW == '%') {
  5988. xmlParsePEReference(ctxt);
  5989. } else
  5990. xmlParseMarkupDecl(ctxt);
  5991. /*
  5992. * Pop-up of finished entities.
  5993. */
  5994. while ((RAW == 0) && (ctxt->inputNr > 1))
  5995. xmlPopInput(ctxt);
  5996. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  5997. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  5998. break;
  5999. }
  6000. }
  6001. if (xmlParserDebugEntities) {
  6002. if ((ctxt->input != NULL) && (ctxt->input->filename))
  6003. xmlGenericError(xmlGenericErrorContext,
  6004. "%s(%d): ", ctxt->input->filename,
  6005. ctxt->input->line);
  6006. xmlGenericError(xmlGenericErrorContext,
  6007. "Leaving INCLUDE Conditional Section\n");
  6008. }
  6009. } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
  6010. int state;
  6011. xmlParserInputState instate;
  6012. int depth = 0;
  6013. SKIP(6);
  6014. SKIP_BLANKS;
  6015. if (RAW != '[') {
  6016. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
  6017. } else {
  6018. if (ctxt->input->id != id) {
  6019. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6020. "All markup of the conditional section is not in the same entity\n",
  6021. NULL, NULL);
  6022. }
  6023. NEXT;
  6024. }
  6025. if (xmlParserDebugEntities) {
  6026. if ((ctxt->input != NULL) && (ctxt->input->filename))
  6027. xmlGenericError(xmlGenericErrorContext,
  6028. "%s(%d): ", ctxt->input->filename,
  6029. ctxt->input->line);
  6030. xmlGenericError(xmlGenericErrorContext,
  6031. "Entering IGNORE Conditional Section\n");
  6032. }
  6033. /*
  6034. * Parse up to the end of the conditional section
  6035. * But disable SAX event generating DTD building in the meantime
  6036. */
  6037. state = ctxt->disableSAX;
  6038. instate = ctxt->instate;
  6039. if (ctxt->recovery == 0) ctxt->disableSAX = 1;
  6040. ctxt->instate = XML_PARSER_IGNORE;
  6041. while ((depth >= 0) && (RAW != 0)) {
  6042. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6043. depth++;
  6044. SKIP(3);
  6045. continue;
  6046. }
  6047. if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
  6048. if (--depth >= 0) SKIP(3);
  6049. continue;
  6050. }
  6051. NEXT;
  6052. continue;
  6053. }
  6054. ctxt->disableSAX = state;
  6055. ctxt->instate = instate;
  6056. if (xmlParserDebugEntities) {
  6057. if ((ctxt->input != NULL) && (ctxt->input->filename))
  6058. xmlGenericError(xmlGenericErrorContext,
  6059. "%s(%d): ", ctxt->input->filename,
  6060. ctxt->input->line);
  6061. xmlGenericError(xmlGenericErrorContext,
  6062. "Leaving IGNORE Conditional Section\n");
  6063. }
  6064. } else {
  6065. xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
  6066. }
  6067. if (RAW == 0)
  6068. SHRINK;
  6069. if (RAW == 0) {
  6070. xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
  6071. } else {
  6072. if (ctxt->input->id != id) {
  6073. xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
  6074. "All markup of the conditional section is not in the same entity\n",
  6075. NULL, NULL);
  6076. }
  6077. SKIP(3);
  6078. }
  6079. }
  6080. /**
  6081. * xmlParseMarkupDecl:
  6082. * @ctxt: an XML parser context
  6083. *
  6084. * parse Markup declarations
  6085. *
  6086. * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
  6087. * NotationDecl | PI | Comment
  6088. *
  6089. * [ VC: Proper Declaration/PE Nesting ]
  6090. * Parameter-entity replacement text must be properly nested with
  6091. * markup declarations. That is to say, if either the first character
  6092. * or the last character of a markup declaration (markupdecl above) is
  6093. * contained in the replacement text for a parameter-entity reference,
  6094. * both must be contained in the same replacement text.
  6095. *
  6096. * [ WFC: PEs in Internal Subset ]
  6097. * In the internal DTD subset, parameter-entity references can occur
  6098. * only where markup declarations can occur, not within markup declarations.
  6099. * (This does not apply to references that occur in external parameter
  6100. * entities or to the external subset.)
  6101. */
  6102. void
  6103. xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
  6104. GROW;
  6105. if (CUR == '<') {
  6106. if (NXT(1) == '!') {
  6107. switch (NXT(2)) {
  6108. case 'E':
  6109. if (NXT(3) == 'L')
  6110. xmlParseElementDecl(ctxt);
  6111. else if (NXT(3) == 'N')
  6112. xmlParseEntityDecl(ctxt);
  6113. break;
  6114. case 'A':
  6115. xmlParseAttributeListDecl(ctxt);
  6116. break;
  6117. case 'N':
  6118. xmlParseNotationDecl(ctxt);
  6119. break;
  6120. case '-':
  6121. xmlParseComment(ctxt);
  6122. break;
  6123. default:
  6124. /* there is an error but it will be detected later */
  6125. break;
  6126. }
  6127. } else if (NXT(1) == '?') {
  6128. xmlParsePI(ctxt);
  6129. }
  6130. }
  6131. /*
  6132. * This is only for internal subset. On external entities,
  6133. * the replacement is done before parsing stage
  6134. */
  6135. if ((ctxt->external == 0) && (ctxt->inputNr == 1))
  6136. xmlParsePEReference(ctxt);
  6137. /*
  6138. * Conditional sections are allowed from entities included
  6139. * by PE References in the internal subset.
  6140. */
  6141. if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
  6142. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6143. xmlParseConditionalSections(ctxt);
  6144. }
  6145. }
  6146. ctxt->instate = XML_PARSER_DTD;
  6147. }
  6148. /**
  6149. * xmlParseTextDecl:
  6150. * @ctxt: an XML parser context
  6151. *
  6152. * parse an XML declaration header for external entities
  6153. *
  6154. * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  6155. */
  6156. void
  6157. xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
  6158. xmlChar *version;
  6159. const xmlChar *encoding;
  6160. /*
  6161. * We know that '<?xml' is here.
  6162. */
  6163. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  6164. SKIP(5);
  6165. } else {
  6166. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
  6167. return;
  6168. }
  6169. if (!IS_BLANK_CH(CUR)) {
  6170. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6171. "Space needed after '<?xml'\n");
  6172. }
  6173. SKIP_BLANKS;
  6174. /*
  6175. * We may have the VersionInfo here.
  6176. */
  6177. version = xmlParseVersionInfo(ctxt);
  6178. if (version == NULL)
  6179. version = xmlCharStrdup(XML_DEFAULT_VERSION);
  6180. else {
  6181. if (!IS_BLANK_CH(CUR)) {
  6182. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  6183. "Space needed here\n");
  6184. }
  6185. }
  6186. ctxt->input->version = version;
  6187. /*
  6188. * We must have the encoding declaration
  6189. */
  6190. encoding = xmlParseEncodingDecl(ctxt);
  6191. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6192. /*
  6193. * The XML REC instructs us to stop parsing right here
  6194. */
  6195. return;
  6196. }
  6197. if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
  6198. xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
  6199. "Missing encoding in text declaration\n");
  6200. }
  6201. SKIP_BLANKS;
  6202. if ((RAW == '?') && (NXT(1) == '>')) {
  6203. SKIP(2);
  6204. } else if (RAW == '>') {
  6205. /* Deprecated old WD ... */
  6206. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6207. NEXT;
  6208. } else {
  6209. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  6210. MOVETO_ENDTAG(CUR_PTR);
  6211. NEXT;
  6212. }
  6213. }
  6214. /**
  6215. * xmlParseExternalSubset:
  6216. * @ctxt: an XML parser context
  6217. * @ExternalID: the external identifier
  6218. * @SystemID: the system identifier (or URL)
  6219. *
  6220. * parse Markup declarations from an external subset
  6221. *
  6222. * [30] extSubset ::= textDecl? extSubsetDecl
  6223. *
  6224. * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
  6225. */
  6226. void
  6227. xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
  6228. const xmlChar *SystemID) {
  6229. xmlDetectSAX2(ctxt);
  6230. GROW;
  6231. if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
  6232. (ctxt->input->end - ctxt->input->cur >= 4)) {
  6233. xmlChar start[4];
  6234. xmlCharEncoding enc;
  6235. start[0] = RAW;
  6236. start[1] = NXT(1);
  6237. start[2] = NXT(2);
  6238. start[3] = NXT(3);
  6239. enc = xmlDetectCharEncoding(start, 4);
  6240. if (enc != XML_CHAR_ENCODING_NONE)
  6241. xmlSwitchEncoding(ctxt, enc);
  6242. }
  6243. if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
  6244. xmlParseTextDecl(ctxt);
  6245. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  6246. /*
  6247. * The XML REC instructs us to stop parsing right here
  6248. */
  6249. ctxt->instate = XML_PARSER_EOF;
  6250. return;
  6251. }
  6252. }
  6253. if (ctxt->myDoc == NULL) {
  6254. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  6255. if (ctxt->myDoc == NULL) {
  6256. xmlErrMemory(ctxt, "New Doc failed");
  6257. return;
  6258. }
  6259. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  6260. }
  6261. if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
  6262. xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
  6263. ctxt->instate = XML_PARSER_DTD;
  6264. ctxt->external = 1;
  6265. while (((RAW == '<') && (NXT(1) == '?')) ||
  6266. ((RAW == '<') && (NXT(1) == '!')) ||
  6267. (RAW == '%') || IS_BLANK_CH(CUR)) {
  6268. const xmlChar *check = CUR_PTR;
  6269. unsigned int cons = ctxt->input->consumed;
  6270. GROW;
  6271. if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  6272. xmlParseConditionalSections(ctxt);
  6273. } else if (IS_BLANK_CH(CUR)) {
  6274. NEXT;
  6275. } else if (RAW == '%') {
  6276. xmlParsePEReference(ctxt);
  6277. } else
  6278. xmlParseMarkupDecl(ctxt);
  6279. /*
  6280. * Pop-up of finished entities.
  6281. */
  6282. while ((RAW == 0) && (ctxt->inputNr > 1))
  6283. xmlPopInput(ctxt);
  6284. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  6285. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6286. break;
  6287. }
  6288. }
  6289. if (RAW != 0) {
  6290. xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
  6291. }
  6292. }
  6293. /**
  6294. * xmlParseReference:
  6295. * @ctxt: an XML parser context
  6296. *
  6297. * parse and handle entity references in content, depending on the SAX
  6298. * interface, this may end-up in a call to character() if this is a
  6299. * CharRef, a predefined entity, if there is no reference() callback.
  6300. * or if the parser was asked to switch to that mode.
  6301. *
  6302. * [67] Reference ::= EntityRef | CharRef
  6303. */
  6304. void
  6305. xmlParseReference(xmlParserCtxtPtr ctxt) {
  6306. xmlEntityPtr ent;
  6307. xmlChar *val;
  6308. int was_checked;
  6309. xmlNodePtr list = NULL;
  6310. xmlParserErrors ret = XML_ERR_OK;
  6311. if (RAW != '&')
  6312. return;
  6313. /*
  6314. * Simple case of a CharRef
  6315. */
  6316. if (NXT(1) == '#') {
  6317. int i = 0;
  6318. xmlChar out[10];
  6319. int hex = NXT(2);
  6320. int value = xmlParseCharRef(ctxt);
  6321. if (value == 0)
  6322. return;
  6323. if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
  6324. /*
  6325. * So we are using non-UTF-8 buffers
  6326. * Check that the char fit on 8bits, if not
  6327. * generate a CharRef.
  6328. */
  6329. if (value <= 0xFF) {
  6330. out[0] = value;
  6331. out[1] = 0;
  6332. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6333. (!ctxt->disableSAX))
  6334. ctxt->sax->characters(ctxt->userData, out, 1);
  6335. } else {
  6336. if ((hex == 'x') || (hex == 'X'))
  6337. snprintf((char *)out, sizeof(out), "#x%X", value);
  6338. else
  6339. snprintf((char *)out, sizeof(out), "#%d", value);
  6340. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6341. (!ctxt->disableSAX))
  6342. ctxt->sax->reference(ctxt->userData, out);
  6343. }
  6344. } else {
  6345. /*
  6346. * Just encode the value in UTF-8
  6347. */
  6348. COPY_BUF(0 ,out, i, value);
  6349. out[i] = 0;
  6350. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6351. (!ctxt->disableSAX))
  6352. ctxt->sax->characters(ctxt->userData, out, i);
  6353. }
  6354. return;
  6355. }
  6356. /*
  6357. * We are seeing an entity reference
  6358. */
  6359. ent = xmlParseEntityRef(ctxt);
  6360. if (ent == NULL) return;
  6361. if (!ctxt->wellFormed)
  6362. return;
  6363. was_checked = ent->checked;
  6364. /* special case of predefined entities */
  6365. if ((ent->name == NULL) ||
  6366. (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
  6367. val = ent->content;
  6368. if (val == NULL) return;
  6369. /*
  6370. * inline the entity.
  6371. */
  6372. if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
  6373. (!ctxt->disableSAX))
  6374. ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
  6375. return;
  6376. }
  6377. /*
  6378. * The first reference to the entity trigger a parsing phase
  6379. * where the ent->children is filled with the result from
  6380. * the parsing.
  6381. */
  6382. if (ent->checked == 0) {
  6383. unsigned long oldnbent = ctxt->nbentities;
  6384. /*
  6385. * This is a bit hackish but this seems the best
  6386. * way to make sure both SAX and DOM entity support
  6387. * behaves okay.
  6388. */
  6389. void *user_data;
  6390. if (ctxt->userData == ctxt)
  6391. user_data = NULL;
  6392. else
  6393. user_data = ctxt->userData;
  6394. /*
  6395. * Check that this entity is well formed
  6396. * 4.3.2: An internal general parsed entity is well-formed
  6397. * if its replacement text matches the production labeled
  6398. * content.
  6399. */
  6400. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6401. ctxt->depth++;
  6402. ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
  6403. user_data, &list);
  6404. ctxt->depth--;
  6405. } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6406. ctxt->depth++;
  6407. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
  6408. user_data, ctxt->depth, ent->URI,
  6409. ent->ExternalID, &list);
  6410. ctxt->depth--;
  6411. } else {
  6412. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6413. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6414. "invalid entity type found\n", NULL);
  6415. }
  6416. /*
  6417. * Store the number of entities needing parsing for this entity
  6418. * content and do checkings
  6419. */
  6420. ent->checked = ctxt->nbentities - oldnbent;
  6421. if (ret == XML_ERR_ENTITY_LOOP) {
  6422. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6423. xmlFreeNodeList(list);
  6424. return;
  6425. }
  6426. if (xmlParserEntityCheck(ctxt, 0, ent)) {
  6427. xmlFreeNodeList(list);
  6428. return;
  6429. }
  6430. if ((ret == XML_ERR_OK) && (list != NULL)) {
  6431. if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
  6432. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
  6433. (ent->children == NULL)) {
  6434. ent->children = list;
  6435. if (ctxt->replaceEntities) {
  6436. /*
  6437. * Prune it directly in the generated document
  6438. * except for single text nodes.
  6439. */
  6440. if (((list->type == XML_TEXT_NODE) &&
  6441. (list->next == NULL)) ||
  6442. (ctxt->parseMode == XML_PARSE_READER)) {
  6443. list->parent = (xmlNodePtr) ent;
  6444. list = NULL;
  6445. ent->owner = 1;
  6446. } else {
  6447. ent->owner = 0;
  6448. while (list != NULL) {
  6449. list->parent = (xmlNodePtr) ctxt->node;
  6450. list->doc = ctxt->myDoc;
  6451. if (list->next == NULL)
  6452. ent->last = list;
  6453. list = list->next;
  6454. }
  6455. list = ent->children;
  6456. #ifdef LIBXML_LEGACY_ENABLED
  6457. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6458. xmlAddEntityReference(ent, list, NULL);
  6459. #endif /* LIBXML_LEGACY_ENABLED */
  6460. }
  6461. } else {
  6462. ent->owner = 1;
  6463. while (list != NULL) {
  6464. list->parent = (xmlNodePtr) ent;
  6465. if (list->next == NULL)
  6466. ent->last = list;
  6467. list = list->next;
  6468. }
  6469. }
  6470. } else {
  6471. xmlFreeNodeList(list);
  6472. list = NULL;
  6473. }
  6474. } else if ((ret != XML_ERR_OK) &&
  6475. (ret != XML_WAR_UNDECLARED_ENTITY)) {
  6476. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  6477. "Entity '%s' failed to parse\n", ent->name);
  6478. } else if (list != NULL) {
  6479. xmlFreeNodeList(list);
  6480. list = NULL;
  6481. }
  6482. if (ent->checked == 0)
  6483. ent->checked = 1;
  6484. } else if (ent->checked != 1) {
  6485. ctxt->nbentities += ent->checked;
  6486. }
  6487. /*
  6488. * Now that the entity content has been gathered
  6489. * provide it to the application, this can take different forms based
  6490. * on the parsing modes.
  6491. */
  6492. if (ent->children == NULL) {
  6493. /*
  6494. * Probably running in SAX mode and the callbacks don't
  6495. * build the entity content. So unless we already went
  6496. * though parsing for first checking go though the entity
  6497. * content to generate callbacks associated to the entity
  6498. */
  6499. if (was_checked != 0) {
  6500. void *user_data;
  6501. /*
  6502. * This is a bit hackish but this seems the best
  6503. * way to make sure both SAX and DOM entity support
  6504. * behaves okay.
  6505. */
  6506. if (ctxt->userData == ctxt)
  6507. user_data = NULL;
  6508. else
  6509. user_data = ctxt->userData;
  6510. if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
  6511. ctxt->depth++;
  6512. ret = xmlParseBalancedChunkMemoryInternal(ctxt,
  6513. ent->content, user_data, NULL);
  6514. ctxt->depth--;
  6515. } else if (ent->etype ==
  6516. XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
  6517. ctxt->depth++;
  6518. ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
  6519. ctxt->sax, user_data, ctxt->depth,
  6520. ent->URI, ent->ExternalID, NULL);
  6521. ctxt->depth--;
  6522. } else {
  6523. ret = XML_ERR_ENTITY_PE_INTERNAL;
  6524. xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
  6525. "invalid entity type found\n", NULL);
  6526. }
  6527. if (ret == XML_ERR_ENTITY_LOOP) {
  6528. xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
  6529. return;
  6530. }
  6531. }
  6532. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6533. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6534. /*
  6535. * Entity reference callback comes second, it's somewhat
  6536. * superfluous but a compatibility to historical behaviour
  6537. */
  6538. ctxt->sax->reference(ctxt->userData, ent->name);
  6539. }
  6540. return;
  6541. }
  6542. /*
  6543. * If we didn't get any children for the entity being built
  6544. */
  6545. if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
  6546. (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
  6547. /*
  6548. * Create a node.
  6549. */
  6550. ctxt->sax->reference(ctxt->userData, ent->name);
  6551. return;
  6552. }
  6553. if ((ctxt->replaceEntities) || (ent->children == NULL)) {
  6554. /*
  6555. * There is a problem on the handling of _private for entities
  6556. * (bug 155816): Should we copy the content of the field from
  6557. * the entity (possibly overwriting some value set by the user
  6558. * when a copy is created), should we leave it alone, or should
  6559. * we try to take care of different situations? The problem
  6560. * is exacerbated by the usage of this field by the xmlReader.
  6561. * To fix this bug, we look at _private on the created node
  6562. * and, if it's NULL, we copy in whatever was in the entity.
  6563. * If it's not NULL we leave it alone. This is somewhat of a
  6564. * hack - maybe we should have further tests to determine
  6565. * what to do.
  6566. */
  6567. if ((ctxt->node != NULL) && (ent->children != NULL)) {
  6568. /*
  6569. * Seems we are generating the DOM content, do
  6570. * a simple tree copy for all references except the first
  6571. * In the first occurrence list contains the replacement.
  6572. * progressive == 2 means we are operating on the Reader
  6573. * and since nodes are discarded we must copy all the time.
  6574. */
  6575. if (((list == NULL) && (ent->owner == 0)) ||
  6576. (ctxt->parseMode == XML_PARSE_READER)) {
  6577. xmlNodePtr nw = NULL, cur, firstChild = NULL;
  6578. /*
  6579. * when operating on a reader, the entities definitions
  6580. * are always owning the entities subtree.
  6581. if (ctxt->parseMode == XML_PARSE_READER)
  6582. ent->owner = 1;
  6583. */
  6584. cur = ent->children;
  6585. while (cur != NULL) {
  6586. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  6587. if (nw != NULL) {
  6588. if (nw->_private == NULL)
  6589. nw->_private = cur->_private;
  6590. if (firstChild == NULL){
  6591. firstChild = nw;
  6592. }
  6593. nw = xmlAddChild(ctxt->node, nw);
  6594. }
  6595. if (cur == ent->last) {
  6596. /*
  6597. * needed to detect some strange empty
  6598. * node cases in the reader tests
  6599. */
  6600. if ((ctxt->parseMode == XML_PARSE_READER) &&
  6601. (nw != NULL) &&
  6602. (nw->type == XML_ELEMENT_NODE) &&
  6603. (nw->children == NULL))
  6604. nw->extra = 1;
  6605. break;
  6606. }
  6607. cur = cur->next;
  6608. }
  6609. #ifdef LIBXML_LEGACY_ENABLED
  6610. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6611. xmlAddEntityReference(ent, firstChild, nw);
  6612. #endif /* LIBXML_LEGACY_ENABLED */
  6613. } else if (list == NULL) {
  6614. xmlNodePtr nw = NULL, cur, next, last,
  6615. firstChild = NULL;
  6616. /*
  6617. * Copy the entity child list and make it the new
  6618. * entity child list. The goal is to make sure any
  6619. * ID or REF referenced will be the one from the
  6620. * document content and not the entity copy.
  6621. */
  6622. cur = ent->children;
  6623. ent->children = NULL;
  6624. last = ent->last;
  6625. ent->last = NULL;
  6626. while (cur != NULL) {
  6627. next = cur->next;
  6628. cur->next = NULL;
  6629. cur->parent = NULL;
  6630. nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
  6631. if (nw != NULL) {
  6632. if (nw->_private == NULL)
  6633. nw->_private = cur->_private;
  6634. if (firstChild == NULL){
  6635. firstChild = cur;
  6636. }
  6637. xmlAddChild((xmlNodePtr) ent, nw);
  6638. xmlAddChild(ctxt->node, cur);
  6639. }
  6640. if (cur == last)
  6641. break;
  6642. cur = next;
  6643. }
  6644. if (ent->owner == 0)
  6645. ent->owner = 1;
  6646. #ifdef LIBXML_LEGACY_ENABLED
  6647. if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
  6648. xmlAddEntityReference(ent, firstChild, nw);
  6649. #endif /* LIBXML_LEGACY_ENABLED */
  6650. } else {
  6651. const xmlChar *nbktext;
  6652. /*
  6653. * the name change is to avoid coalescing of the
  6654. * node with a possible previous text one which
  6655. * would make ent->children a dangling pointer
  6656. */
  6657. nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
  6658. -1);
  6659. if (ent->children->type == XML_TEXT_NODE)
  6660. ent->children->name = nbktext;
  6661. if ((ent->last != ent->children) &&
  6662. (ent->last->type == XML_TEXT_NODE))
  6663. ent->last->name = nbktext;
  6664. xmlAddChildList(ctxt->node, ent->children);
  6665. }
  6666. /*
  6667. * This is to avoid a nasty side effect, see
  6668. * characters() in SAX.c
  6669. */
  6670. ctxt->nodemem = 0;
  6671. ctxt->nodelen = 0;
  6672. return;
  6673. }
  6674. }
  6675. }
  6676. /**
  6677. * xmlParseEntityRef:
  6678. * @ctxt: an XML parser context
  6679. *
  6680. * parse ENTITY references declarations
  6681. *
  6682. * [68] EntityRef ::= '&' Name ';'
  6683. *
  6684. * [ WFC: Entity Declared ]
  6685. * In a document without any DTD, a document with only an internal DTD
  6686. * subset which contains no parameter entity references, or a document
  6687. * with "standalone='yes'", the Name given in the entity reference
  6688. * must match that in an entity declaration, except that well-formed
  6689. * documents need not declare any of the following entities: amp, lt,
  6690. * gt, apos, quot. The declaration of a parameter entity must precede
  6691. * any reference to it. Similarly, the declaration of a general entity
  6692. * must precede any reference to it which appears in a default value in an
  6693. * attribute-list declaration. Note that if entities are declared in the
  6694. * external subset or in external parameter entities, a non-validating
  6695. * processor is not obligated to read and process their declarations;
  6696. * for such documents, the rule that an entity must be declared is a
  6697. * well-formedness constraint only if standalone='yes'.
  6698. *
  6699. * [ WFC: Parsed Entity ]
  6700. * An entity reference must not contain the name of an unparsed entity
  6701. *
  6702. * Returns the xmlEntityPtr if found, or NULL otherwise.
  6703. */
  6704. xmlEntityPtr
  6705. xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
  6706. const xmlChar *name;
  6707. xmlEntityPtr ent = NULL;
  6708. GROW;
  6709. if (RAW != '&')
  6710. return(NULL);
  6711. NEXT;
  6712. name = xmlParseName(ctxt);
  6713. if (name == NULL) {
  6714. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  6715. "xmlParseEntityRef: no name\n");
  6716. return(NULL);
  6717. }
  6718. if (RAW != ';') {
  6719. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  6720. return(NULL);
  6721. }
  6722. NEXT;
  6723. /*
  6724. * Predefined entites override any extra definition
  6725. */
  6726. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  6727. ent = xmlGetPredefinedEntity(name);
  6728. if (ent != NULL)
  6729. return(ent);
  6730. }
  6731. /*
  6732. * Increate the number of entity references parsed
  6733. */
  6734. ctxt->nbentities++;
  6735. /*
  6736. * Ask first SAX for entity resolution, otherwise try the
  6737. * entities which may have stored in the parser context.
  6738. */
  6739. if (ctxt->sax != NULL) {
  6740. if (ctxt->sax->getEntity != NULL)
  6741. ent = ctxt->sax->getEntity(ctxt->userData, name);
  6742. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  6743. (ctxt->options & XML_PARSE_OLDSAX))
  6744. ent = xmlGetPredefinedEntity(name);
  6745. if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
  6746. (ctxt->userData==ctxt)) {
  6747. ent = xmlSAX2GetEntity(ctxt, name);
  6748. }
  6749. }
  6750. /*
  6751. * [ WFC: Entity Declared ]
  6752. * In a document without any DTD, a document with only an
  6753. * internal DTD subset which contains no parameter entity
  6754. * references, or a document with "standalone='yes'", the
  6755. * Name given in the entity reference must match that in an
  6756. * entity declaration, except that well-formed documents
  6757. * need not declare any of the following entities: amp, lt,
  6758. * gt, apos, quot.
  6759. * The declaration of a parameter entity must precede any
  6760. * reference to it.
  6761. * Similarly, the declaration of a general entity must
  6762. * precede any reference to it which appears in a default
  6763. * value in an attribute-list declaration. Note that if
  6764. * entities are declared in the external subset or in
  6765. * external parameter entities, a non-validating processor
  6766. * is not obligated to read and process their declarations;
  6767. * for such documents, the rule that an entity must be
  6768. * declared is a well-formedness constraint only if
  6769. * standalone='yes'.
  6770. */
  6771. if (ent == NULL) {
  6772. if ((ctxt->standalone == 1) ||
  6773. ((ctxt->hasExternalSubset == 0) &&
  6774. (ctxt->hasPErefs == 0))) {
  6775. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  6776. "Entity '%s' not defined\n", name);
  6777. } else {
  6778. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  6779. "Entity '%s' not defined\n", name);
  6780. if ((ctxt->inSubset == 0) &&
  6781. (ctxt->sax != NULL) &&
  6782. (ctxt->sax->reference != NULL)) {
  6783. ctxt->sax->reference(ctxt->userData, name);
  6784. }
  6785. }
  6786. ctxt->valid = 0;
  6787. }
  6788. /*
  6789. * [ WFC: Parsed Entity ]
  6790. * An entity reference must not contain the name of an
  6791. * unparsed entity
  6792. */
  6793. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  6794. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  6795. "Entity reference to unparsed entity %s\n", name);
  6796. }
  6797. /*
  6798. * [ WFC: No External Entity References ]
  6799. * Attribute values cannot contain direct or indirect
  6800. * entity references to external entities.
  6801. */
  6802. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  6803. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  6804. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  6805. "Attribute references external entity '%s'\n", name);
  6806. }
  6807. /*
  6808. * [ WFC: No < in Attribute Values ]
  6809. * The replacement text of any entity referred to directly or
  6810. * indirectly in an attribute value (other than "&lt;") must
  6811. * not contain a <.
  6812. */
  6813. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  6814. (ent != NULL) && (ent->content != NULL) &&
  6815. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  6816. (xmlStrchr(ent->content, '<'))) {
  6817. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  6818. "'<' in entity '%s' is not allowed in attributes values\n", name);
  6819. }
  6820. /*
  6821. * Internal check, no parameter entities here ...
  6822. */
  6823. else {
  6824. switch (ent->etype) {
  6825. case XML_INTERNAL_PARAMETER_ENTITY:
  6826. case XML_EXTERNAL_PARAMETER_ENTITY:
  6827. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  6828. "Attempt to reference the parameter entity '%s'\n",
  6829. name);
  6830. break;
  6831. default:
  6832. break;
  6833. }
  6834. }
  6835. /*
  6836. * [ WFC: No Recursion ]
  6837. * A parsed entity must not contain a recursive reference
  6838. * to itself, either directly or indirectly.
  6839. * Done somewhere else
  6840. */
  6841. return(ent);
  6842. }
  6843. /**
  6844. * xmlParseStringEntityRef:
  6845. * @ctxt: an XML parser context
  6846. * @str: a pointer to an index in the string
  6847. *
  6848. * parse ENTITY references declarations, but this version parses it from
  6849. * a string value.
  6850. *
  6851. * [68] EntityRef ::= '&' Name ';'
  6852. *
  6853. * [ WFC: Entity Declared ]
  6854. * In a document without any DTD, a document with only an internal DTD
  6855. * subset which contains no parameter entity references, or a document
  6856. * with "standalone='yes'", the Name given in the entity reference
  6857. * must match that in an entity declaration, except that well-formed
  6858. * documents need not declare any of the following entities: amp, lt,
  6859. * gt, apos, quot. The declaration of a parameter entity must precede
  6860. * any reference to it. Similarly, the declaration of a general entity
  6861. * must precede any reference to it which appears in a default value in an
  6862. * attribute-list declaration. Note that if entities are declared in the
  6863. * external subset or in external parameter entities, a non-validating
  6864. * processor is not obligated to read and process their declarations;
  6865. * for such documents, the rule that an entity must be declared is a
  6866. * well-formedness constraint only if standalone='yes'.
  6867. *
  6868. * [ WFC: Parsed Entity ]
  6869. * An entity reference must not contain the name of an unparsed entity
  6870. *
  6871. * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
  6872. * is updated to the current location in the string.
  6873. */
  6874. static xmlEntityPtr
  6875. xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
  6876. xmlChar *name;
  6877. const xmlChar *ptr;
  6878. xmlChar cur;
  6879. xmlEntityPtr ent = NULL;
  6880. if ((str == NULL) || (*str == NULL))
  6881. return(NULL);
  6882. ptr = *str;
  6883. cur = *ptr;
  6884. if (cur != '&')
  6885. return(NULL);
  6886. ptr++;
  6887. name = xmlParseStringName(ctxt, &ptr);
  6888. if (name == NULL) {
  6889. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  6890. "xmlParseStringEntityRef: no name\n");
  6891. *str = ptr;
  6892. return(NULL);
  6893. }
  6894. if (*ptr != ';') {
  6895. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  6896. xmlFree(name);
  6897. *str = ptr;
  6898. return(NULL);
  6899. }
  6900. ptr++;
  6901. /*
  6902. * Predefined entites override any extra definition
  6903. */
  6904. if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
  6905. ent = xmlGetPredefinedEntity(name);
  6906. if (ent != NULL) {
  6907. xmlFree(name);
  6908. *str = ptr;
  6909. return(ent);
  6910. }
  6911. }
  6912. /*
  6913. * Increate the number of entity references parsed
  6914. */
  6915. ctxt->nbentities++;
  6916. /*
  6917. * Ask first SAX for entity resolution, otherwise try the
  6918. * entities which may have stored in the parser context.
  6919. */
  6920. if (ctxt->sax != NULL) {
  6921. if (ctxt->sax->getEntity != NULL)
  6922. ent = ctxt->sax->getEntity(ctxt->userData, name);
  6923. if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
  6924. ent = xmlGetPredefinedEntity(name);
  6925. if ((ent == NULL) && (ctxt->userData==ctxt)) {
  6926. ent = xmlSAX2GetEntity(ctxt, name);
  6927. }
  6928. }
  6929. /*
  6930. * [ WFC: Entity Declared ]
  6931. * In a document without any DTD, a document with only an
  6932. * internal DTD subset which contains no parameter entity
  6933. * references, or a document with "standalone='yes'", the
  6934. * Name given in the entity reference must match that in an
  6935. * entity declaration, except that well-formed documents
  6936. * need not declare any of the following entities: amp, lt,
  6937. * gt, apos, quot.
  6938. * The declaration of a parameter entity must precede any
  6939. * reference to it.
  6940. * Similarly, the declaration of a general entity must
  6941. * precede any reference to it which appears in a default
  6942. * value in an attribute-list declaration. Note that if
  6943. * entities are declared in the external subset or in
  6944. * external parameter entities, a non-validating processor
  6945. * is not obligated to read and process their declarations;
  6946. * for such documents, the rule that an entity must be
  6947. * declared is a well-formedness constraint only if
  6948. * standalone='yes'.
  6949. */
  6950. if (ent == NULL) {
  6951. if ((ctxt->standalone == 1) ||
  6952. ((ctxt->hasExternalSubset == 0) &&
  6953. (ctxt->hasPErefs == 0))) {
  6954. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  6955. "Entity '%s' not defined\n", name);
  6956. } else {
  6957. xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
  6958. "Entity '%s' not defined\n",
  6959. name);
  6960. }
  6961. /* TODO ? check regressions ctxt->valid = 0; */
  6962. }
  6963. /*
  6964. * [ WFC: Parsed Entity ]
  6965. * An entity reference must not contain the name of an
  6966. * unparsed entity
  6967. */
  6968. else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
  6969. xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
  6970. "Entity reference to unparsed entity %s\n", name);
  6971. }
  6972. /*
  6973. * [ WFC: No External Entity References ]
  6974. * Attribute values cannot contain direct or indirect
  6975. * entity references to external entities.
  6976. */
  6977. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  6978. (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
  6979. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
  6980. "Attribute references external entity '%s'\n", name);
  6981. }
  6982. /*
  6983. * [ WFC: No < in Attribute Values ]
  6984. * The replacement text of any entity referred to directly or
  6985. * indirectly in an attribute value (other than "&lt;") must
  6986. * not contain a <.
  6987. */
  6988. else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
  6989. (ent != NULL) && (ent->content != NULL) &&
  6990. (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
  6991. (xmlStrchr(ent->content, '<'))) {
  6992. xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
  6993. "'<' in entity '%s' is not allowed in attributes values\n",
  6994. name);
  6995. }
  6996. /*
  6997. * Internal check, no parameter entities here ...
  6998. */
  6999. else {
  7000. switch (ent->etype) {
  7001. case XML_INTERNAL_PARAMETER_ENTITY:
  7002. case XML_EXTERNAL_PARAMETER_ENTITY:
  7003. xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
  7004. "Attempt to reference the parameter entity '%s'\n",
  7005. name);
  7006. break;
  7007. default:
  7008. break;
  7009. }
  7010. }
  7011. /*
  7012. * [ WFC: No Recursion ]
  7013. * A parsed entity must not contain a recursive reference
  7014. * to itself, either directly or indirectly.
  7015. * Done somewhere else
  7016. */
  7017. xmlFree(name);
  7018. *str = ptr;
  7019. return(ent);
  7020. }
  7021. /**
  7022. * xmlParsePEReference:
  7023. * @ctxt: an XML parser context
  7024. *
  7025. * parse PEReference declarations
  7026. * The entity content is handled directly by pushing it's content as
  7027. * a new input stream.
  7028. *
  7029. * [69] PEReference ::= '%' Name ';'
  7030. *
  7031. * [ WFC: No Recursion ]
  7032. * A parsed entity must not contain a recursive
  7033. * reference to itself, either directly or indirectly.
  7034. *
  7035. * [ WFC: Entity Declared ]
  7036. * In a document without any DTD, a document with only an internal DTD
  7037. * subset which contains no parameter entity references, or a document
  7038. * with "standalone='yes'", ... ... The declaration of a parameter
  7039. * entity must precede any reference to it...
  7040. *
  7041. * [ VC: Entity Declared ]
  7042. * In a document with an external subset or external parameter entities
  7043. * with "standalone='no'", ... ... The declaration of a parameter entity
  7044. * must precede any reference to it...
  7045. *
  7046. * [ WFC: In DTD ]
  7047. * Parameter-entity references may only appear in the DTD.
  7048. * NOTE: misleading but this is handled.
  7049. */
  7050. void
  7051. xmlParsePEReference(xmlParserCtxtPtr ctxt)
  7052. {
  7053. const xmlChar *name;
  7054. xmlEntityPtr entity = NULL;
  7055. xmlParserInputPtr input;
  7056. if (RAW != '%')
  7057. return;
  7058. NEXT;
  7059. name = xmlParseName(ctxt);
  7060. if (name == NULL) {
  7061. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7062. "xmlParsePEReference: no name\n");
  7063. return;
  7064. }
  7065. if (RAW != ';') {
  7066. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7067. return;
  7068. }
  7069. NEXT;
  7070. /*
  7071. * Increate the number of entity references parsed
  7072. */
  7073. ctxt->nbentities++;
  7074. /*
  7075. * Request the entity from SAX
  7076. */
  7077. if ((ctxt->sax != NULL) &&
  7078. (ctxt->sax->getParameterEntity != NULL))
  7079. entity = ctxt->sax->getParameterEntity(ctxt->userData,
  7080. name);
  7081. if (entity == NULL) {
  7082. /*
  7083. * [ WFC: Entity Declared ]
  7084. * In a document without any DTD, a document with only an
  7085. * internal DTD subset which contains no parameter entity
  7086. * references, or a document with "standalone='yes'", ...
  7087. * ... The declaration of a parameter entity must precede
  7088. * any reference to it...
  7089. */
  7090. if ((ctxt->standalone == 1) ||
  7091. ((ctxt->hasExternalSubset == 0) &&
  7092. (ctxt->hasPErefs == 0))) {
  7093. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7094. "PEReference: %%%s; not found\n",
  7095. name);
  7096. } else {
  7097. /*
  7098. * [ VC: Entity Declared ]
  7099. * In a document with an external subset or external
  7100. * parameter entities with "standalone='no'", ...
  7101. * ... The declaration of a parameter entity must
  7102. * precede any reference to it...
  7103. */
  7104. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7105. "PEReference: %%%s; not found\n",
  7106. name, NULL);
  7107. ctxt->valid = 0;
  7108. }
  7109. } else {
  7110. /*
  7111. * Internal checking in case the entity quest barfed
  7112. */
  7113. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7114. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7115. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7116. "Internal: %%%s; is not a parameter entity\n",
  7117. name, NULL);
  7118. } else if (ctxt->input->free != deallocblankswrapper) {
  7119. input = xmlNewBlanksWrapperInputStream(ctxt, entity);
  7120. if (xmlPushInput(ctxt, input) < 0)
  7121. return;
  7122. } else {
  7123. /*
  7124. * TODO !!!
  7125. * handle the extra spaces added before and after
  7126. * c.f. http://www.w3.org/TR/REC-xml#as-PE
  7127. */
  7128. input = xmlNewEntityInputStream(ctxt, entity);
  7129. if (xmlPushInput(ctxt, input) < 0)
  7130. return;
  7131. if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
  7132. (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
  7133. (IS_BLANK_CH(NXT(5)))) {
  7134. xmlParseTextDecl(ctxt);
  7135. if (ctxt->errNo ==
  7136. XML_ERR_UNSUPPORTED_ENCODING) {
  7137. /*
  7138. * The XML REC instructs us to stop parsing
  7139. * right here
  7140. */
  7141. ctxt->instate = XML_PARSER_EOF;
  7142. return;
  7143. }
  7144. }
  7145. }
  7146. }
  7147. ctxt->hasPErefs = 1;
  7148. }
  7149. /**
  7150. * xmlLoadEntityContent:
  7151. * @ctxt: an XML parser context
  7152. * @entity: an unloaded system entity
  7153. *
  7154. * Load the original content of the given system entity from the
  7155. * ExternalID/SystemID given. This is to be used for Included in Literal
  7156. * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
  7157. *
  7158. * Returns 0 in case of success and -1 in case of failure
  7159. */
  7160. static int
  7161. xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
  7162. xmlParserInputPtr input;
  7163. xmlBufferPtr buf;
  7164. int l, c;
  7165. int count = 0;
  7166. if ((ctxt == NULL) || (entity == NULL) ||
  7167. ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
  7168. (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
  7169. (entity->content != NULL)) {
  7170. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7171. "xmlLoadEntityContent parameter error");
  7172. return(-1);
  7173. }
  7174. if (xmlParserDebugEntities)
  7175. xmlGenericError(xmlGenericErrorContext,
  7176. "Reading %s entity content input\n", entity->name);
  7177. buf = xmlBufferCreate();
  7178. if (buf == NULL) {
  7179. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7180. "xmlLoadEntityContent parameter error");
  7181. return(-1);
  7182. }
  7183. input = xmlNewEntityInputStream(ctxt, entity);
  7184. if (input == NULL) {
  7185. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7186. "xmlLoadEntityContent input error");
  7187. xmlBufferFree(buf);
  7188. return(-1);
  7189. }
  7190. /*
  7191. * Push the entity as the current input, read char by char
  7192. * saving to the buffer until the end of the entity or an error
  7193. */
  7194. if (xmlPushInput(ctxt, input) < 0) {
  7195. xmlBufferFree(buf);
  7196. return(-1);
  7197. }
  7198. GROW;
  7199. c = CUR_CHAR(l);
  7200. while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
  7201. (IS_CHAR(c))) {
  7202. xmlBufferAdd(buf, ctxt->input->cur, l);
  7203. if (count++ > 100) {
  7204. count = 0;
  7205. GROW;
  7206. }
  7207. NEXTL(l);
  7208. c = CUR_CHAR(l);
  7209. }
  7210. if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
  7211. xmlPopInput(ctxt);
  7212. } else if (!IS_CHAR(c)) {
  7213. xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
  7214. "xmlLoadEntityContent: invalid char value %d\n",
  7215. c);
  7216. xmlBufferFree(buf);
  7217. return(-1);
  7218. }
  7219. entity->content = buf->content;
  7220. buf->content = NULL;
  7221. xmlBufferFree(buf);
  7222. return(0);
  7223. }
  7224. /**
  7225. * xmlParseStringPEReference:
  7226. * @ctxt: an XML parser context
  7227. * @str: a pointer to an index in the string
  7228. *
  7229. * parse PEReference declarations
  7230. *
  7231. * [69] PEReference ::= '%' Name ';'
  7232. *
  7233. * [ WFC: No Recursion ]
  7234. * A parsed entity must not contain a recursive
  7235. * reference to itself, either directly or indirectly.
  7236. *
  7237. * [ WFC: Entity Declared ]
  7238. * In a document without any DTD, a document with only an internal DTD
  7239. * subset which contains no parameter entity references, or a document
  7240. * with "standalone='yes'", ... ... The declaration of a parameter
  7241. * entity must precede any reference to it...
  7242. *
  7243. * [ VC: Entity Declared ]
  7244. * In a document with an external subset or external parameter entities
  7245. * with "standalone='no'", ... ... The declaration of a parameter entity
  7246. * must precede any reference to it...
  7247. *
  7248. * [ WFC: In DTD ]
  7249. * Parameter-entity references may only appear in the DTD.
  7250. * NOTE: misleading but this is handled.
  7251. *
  7252. * Returns the string of the entity content.
  7253. * str is updated to the current value of the index
  7254. */
  7255. static xmlEntityPtr
  7256. xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
  7257. const xmlChar *ptr;
  7258. xmlChar cur;
  7259. xmlChar *name;
  7260. xmlEntityPtr entity = NULL;
  7261. if ((str == NULL) || (*str == NULL)) return(NULL);
  7262. ptr = *str;
  7263. cur = *ptr;
  7264. if (cur != '%')
  7265. return(NULL);
  7266. ptr++;
  7267. name = xmlParseStringName(ctxt, &ptr);
  7268. if (name == NULL) {
  7269. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7270. "xmlParseStringPEReference: no name\n");
  7271. *str = ptr;
  7272. return(NULL);
  7273. }
  7274. cur = *ptr;
  7275. if (cur != ';') {
  7276. xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
  7277. xmlFree(name);
  7278. *str = ptr;
  7279. return(NULL);
  7280. }
  7281. ptr++;
  7282. /*
  7283. * Increate the number of entity references parsed
  7284. */
  7285. ctxt->nbentities++;
  7286. /*
  7287. * Request the entity from SAX
  7288. */
  7289. if ((ctxt->sax != NULL) &&
  7290. (ctxt->sax->getParameterEntity != NULL))
  7291. entity = ctxt->sax->getParameterEntity(ctxt->userData,
  7292. name);
  7293. if (entity == NULL) {
  7294. /*
  7295. * [ WFC: Entity Declared ]
  7296. * In a document without any DTD, a document with only an
  7297. * internal DTD subset which contains no parameter entity
  7298. * references, or a document with "standalone='yes'", ...
  7299. * ... The declaration of a parameter entity must precede
  7300. * any reference to it...
  7301. */
  7302. if ((ctxt->standalone == 1) ||
  7303. ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
  7304. xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
  7305. "PEReference: %%%s; not found\n", name);
  7306. } else {
  7307. /*
  7308. * [ VC: Entity Declared ]
  7309. * In a document with an external subset or external
  7310. * parameter entities with "standalone='no'", ...
  7311. * ... The declaration of a parameter entity must
  7312. * precede any reference to it...
  7313. */
  7314. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7315. "PEReference: %%%s; not found\n",
  7316. name, NULL);
  7317. ctxt->valid = 0;
  7318. }
  7319. } else {
  7320. /*
  7321. * Internal checking in case the entity quest barfed
  7322. */
  7323. if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
  7324. (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
  7325. xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
  7326. "%%%s; is not a parameter entity\n",
  7327. name, NULL);
  7328. }
  7329. }
  7330. ctxt->hasPErefs = 1;
  7331. xmlFree(name);
  7332. *str = ptr;
  7333. return(entity);
  7334. }
  7335. /**
  7336. * xmlParseDocTypeDecl:
  7337. * @ctxt: an XML parser context
  7338. *
  7339. * parse a DOCTYPE declaration
  7340. *
  7341. * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  7342. * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7343. *
  7344. * [ VC: Root Element Type ]
  7345. * The Name in the document type declaration must match the element
  7346. * type of the root element.
  7347. */
  7348. void
  7349. xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
  7350. const xmlChar *name = NULL;
  7351. xmlChar *ExternalID = NULL;
  7352. xmlChar *URI = NULL;
  7353. /*
  7354. * We know that '<!DOCTYPE' has been detected.
  7355. */
  7356. SKIP(9);
  7357. SKIP_BLANKS;
  7358. /*
  7359. * Parse the DOCTYPE name.
  7360. */
  7361. name = xmlParseName(ctxt);
  7362. if (name == NULL) {
  7363. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7364. "xmlParseDocTypeDecl : no DOCTYPE name !\n");
  7365. }
  7366. ctxt->intSubName = name;
  7367. SKIP_BLANKS;
  7368. /*
  7369. * Check for SystemID and ExternalID
  7370. */
  7371. URI = xmlParseExternalID(ctxt, &ExternalID, 1);
  7372. if ((URI != NULL) || (ExternalID != NULL)) {
  7373. ctxt->hasExternalSubset = 1;
  7374. }
  7375. ctxt->extSubURI = URI;
  7376. ctxt->extSubSystem = ExternalID;
  7377. SKIP_BLANKS;
  7378. /*
  7379. * Create and update the internal subset.
  7380. */
  7381. if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
  7382. (!ctxt->disableSAX))
  7383. ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
  7384. /*
  7385. * Is there any internal subset declarations ?
  7386. * they are handled separately in xmlParseInternalSubset()
  7387. */
  7388. if (RAW == '[')
  7389. return;
  7390. /*
  7391. * We should be at the end of the DOCTYPE declaration.
  7392. */
  7393. if (RAW != '>') {
  7394. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7395. }
  7396. NEXT;
  7397. }
  7398. /**
  7399. * xmlParseInternalSubset:
  7400. * @ctxt: an XML parser context
  7401. *
  7402. * parse the internal subset declaration
  7403. *
  7404. * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  7405. */
  7406. static void
  7407. xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
  7408. /*
  7409. * Is there any DTD definition ?
  7410. */
  7411. if (RAW == '[') {
  7412. ctxt->instate = XML_PARSER_DTD;
  7413. NEXT;
  7414. /*
  7415. * Parse the succession of Markup declarations and
  7416. * PEReferences.
  7417. * Subsequence (markupdecl | PEReference | S)*
  7418. */
  7419. while (RAW != ']') {
  7420. const xmlChar *check = CUR_PTR;
  7421. unsigned int cons = ctxt->input->consumed;
  7422. SKIP_BLANKS;
  7423. xmlParseMarkupDecl(ctxt);
  7424. xmlParsePEReference(ctxt);
  7425. /*
  7426. * Pop-up of finished entities.
  7427. */
  7428. while ((RAW == 0) && (ctxt->inputNr > 1))
  7429. xmlPopInput(ctxt);
  7430. if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  7431. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  7432. "xmlParseInternalSubset: error detected in Markup declaration\n");
  7433. break;
  7434. }
  7435. }
  7436. if (RAW == ']') {
  7437. NEXT;
  7438. SKIP_BLANKS;
  7439. }
  7440. }
  7441. /*
  7442. * We should be at the end of the DOCTYPE declaration.
  7443. */
  7444. if (RAW != '>') {
  7445. xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
  7446. }
  7447. NEXT;
  7448. }
  7449. #ifdef LIBXML_SAX1_ENABLED
  7450. /**
  7451. * xmlParseAttribute:
  7452. * @ctxt: an XML parser context
  7453. * @value: a xmlChar ** used to store the value of the attribute
  7454. *
  7455. * parse an attribute
  7456. *
  7457. * [41] Attribute ::= Name Eq AttValue
  7458. *
  7459. * [ WFC: No External Entity References ]
  7460. * Attribute values cannot contain direct or indirect entity references
  7461. * to external entities.
  7462. *
  7463. * [ WFC: No < in Attribute Values ]
  7464. * The replacement text of any entity referred to directly or indirectly in
  7465. * an attribute value (other than "&lt;") must not contain a <.
  7466. *
  7467. * [ VC: Attribute Value Type ]
  7468. * The attribute must have been declared; the value must be of the type
  7469. * declared for it.
  7470. *
  7471. * [25] Eq ::= S? '=' S?
  7472. *
  7473. * With namespace:
  7474. *
  7475. * [NS 11] Attribute ::= QName Eq AttValue
  7476. *
  7477. * Also the case QName == xmlns:??? is handled independently as a namespace
  7478. * definition.
  7479. *
  7480. * Returns the attribute name, and the value in *value.
  7481. */
  7482. const xmlChar *
  7483. xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
  7484. const xmlChar *name;
  7485. xmlChar *val;
  7486. *value = NULL;
  7487. GROW;
  7488. name = xmlParseName(ctxt);
  7489. if (name == NULL) {
  7490. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7491. "error parsing attribute name\n");
  7492. return(NULL);
  7493. }
  7494. /*
  7495. * read the value
  7496. */
  7497. SKIP_BLANKS;
  7498. if (RAW == '=') {
  7499. NEXT;
  7500. SKIP_BLANKS;
  7501. val = xmlParseAttValue(ctxt);
  7502. ctxt->instate = XML_PARSER_CONTENT;
  7503. } else {
  7504. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  7505. "Specification mandate value for attribute %s\n", name);
  7506. return(NULL);
  7507. }
  7508. /*
  7509. * Check that xml:lang conforms to the specification
  7510. * No more registered as an error, just generate a warning now
  7511. * since this was deprecated in XML second edition
  7512. */
  7513. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
  7514. if (!xmlCheckLanguageID(val)) {
  7515. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  7516. "Malformed value for xml:lang : %s\n",
  7517. val, NULL);
  7518. }
  7519. }
  7520. /*
  7521. * Check that xml:space conforms to the specification
  7522. */
  7523. if (xmlStrEqual(name, BAD_CAST "xml:space")) {
  7524. if (xmlStrEqual(val, BAD_CAST "default"))
  7525. *(ctxt->space) = 0;
  7526. else if (xmlStrEqual(val, BAD_CAST "preserve"))
  7527. *(ctxt->space) = 1;
  7528. else {
  7529. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  7530. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  7531. val, NULL);
  7532. }
  7533. }
  7534. *value = val;
  7535. return(name);
  7536. }
  7537. /**
  7538. * xmlParseStartTag:
  7539. * @ctxt: an XML parser context
  7540. *
  7541. * parse a start of tag either for rule element or
  7542. * EmptyElement. In both case we don't parse the tag closing chars.
  7543. *
  7544. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  7545. *
  7546. * [ WFC: Unique Att Spec ]
  7547. * No attribute name may appear more than once in the same start-tag or
  7548. * empty-element tag.
  7549. *
  7550. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  7551. *
  7552. * [ WFC: Unique Att Spec ]
  7553. * No attribute name may appear more than once in the same start-tag or
  7554. * empty-element tag.
  7555. *
  7556. * With namespace:
  7557. *
  7558. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  7559. *
  7560. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  7561. *
  7562. * Returns the element name parsed
  7563. */
  7564. const xmlChar *
  7565. xmlParseStartTag(xmlParserCtxtPtr ctxt) {
  7566. const xmlChar *name;
  7567. const xmlChar *attname;
  7568. xmlChar *attvalue;
  7569. const xmlChar **atts = ctxt->atts;
  7570. int nbatts = 0;
  7571. int maxatts = ctxt->maxatts;
  7572. int i;
  7573. if (RAW != '<') return(NULL);
  7574. NEXT1;
  7575. name = xmlParseName(ctxt);
  7576. if (name == NULL) {
  7577. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  7578. "xmlParseStartTag: invalid element name\n");
  7579. return(NULL);
  7580. }
  7581. /*
  7582. * Now parse the attributes, it ends up with the ending
  7583. *
  7584. * (S Attribute)* S?
  7585. */
  7586. SKIP_BLANKS;
  7587. GROW;
  7588. while ((RAW != '>') &&
  7589. ((RAW != '/') || (NXT(1) != '>')) &&
  7590. (IS_BYTE_CHAR(RAW))) {
  7591. const xmlChar *q = CUR_PTR;
  7592. unsigned int cons = ctxt->input->consumed;
  7593. attname = xmlParseAttribute(ctxt, &attvalue);
  7594. if ((attname != NULL) && (attvalue != NULL)) {
  7595. /*
  7596. * [ WFC: Unique Att Spec ]
  7597. * No attribute name may appear more than once in the same
  7598. * start-tag or empty-element tag.
  7599. */
  7600. for (i = 0; i < nbatts;i += 2) {
  7601. if (xmlStrEqual(atts[i], attname)) {
  7602. xmlErrAttributeDup(ctxt, NULL, attname);
  7603. xmlFree(attvalue);
  7604. goto failed;
  7605. }
  7606. }
  7607. /*
  7608. * Add the pair to atts
  7609. */
  7610. if (atts == NULL) {
  7611. maxatts = 22; /* allow for 10 attrs by default */
  7612. atts = (const xmlChar **)
  7613. xmlMalloc(maxatts * sizeof(xmlChar *));
  7614. if (atts == NULL) {
  7615. xmlErrMemory(ctxt, NULL);
  7616. if (attvalue != NULL)
  7617. xmlFree(attvalue);
  7618. goto failed;
  7619. }
  7620. ctxt->atts = atts;
  7621. ctxt->maxatts = maxatts;
  7622. } else if (nbatts + 4 > maxatts) {
  7623. const xmlChar **n;
  7624. maxatts *= 2;
  7625. n = (const xmlChar **) xmlRealloc((void *) atts,
  7626. maxatts * sizeof(const xmlChar *));
  7627. if (n == NULL) {
  7628. xmlErrMemory(ctxt, NULL);
  7629. if (attvalue != NULL)
  7630. xmlFree(attvalue);
  7631. goto failed;
  7632. }
  7633. atts = n;
  7634. ctxt->atts = atts;
  7635. ctxt->maxatts = maxatts;
  7636. }
  7637. atts[nbatts++] = attname;
  7638. atts[nbatts++] = attvalue;
  7639. atts[nbatts] = NULL;
  7640. atts[nbatts + 1] = NULL;
  7641. } else {
  7642. if (attvalue != NULL)
  7643. xmlFree(attvalue);
  7644. }
  7645. failed:
  7646. GROW
  7647. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  7648. break;
  7649. if (!IS_BLANK_CH(RAW)) {
  7650. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  7651. "attributes construct error\n");
  7652. }
  7653. SKIP_BLANKS;
  7654. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  7655. (attname == NULL) && (attvalue == NULL)) {
  7656. xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
  7657. "xmlParseStartTag: problem parsing attributes\n");
  7658. break;
  7659. }
  7660. SHRINK;
  7661. GROW;
  7662. }
  7663. /*
  7664. * SAX: Start of Element !
  7665. */
  7666. if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
  7667. (!ctxt->disableSAX)) {
  7668. if (nbatts > 0)
  7669. ctxt->sax->startElement(ctxt->userData, name, atts);
  7670. else
  7671. ctxt->sax->startElement(ctxt->userData, name, NULL);
  7672. }
  7673. if (atts != NULL) {
  7674. /* Free only the content strings */
  7675. for (i = 1;i < nbatts;i+=2)
  7676. if (atts[i] != NULL)
  7677. xmlFree((xmlChar *) atts[i]);
  7678. }
  7679. return(name);
  7680. }
  7681. /**
  7682. * xmlParseEndTag1:
  7683. * @ctxt: an XML parser context
  7684. * @line: line of the start tag
  7685. * @nsNr: number of namespaces on the start tag
  7686. *
  7687. * parse an end of tag
  7688. *
  7689. * [42] ETag ::= '</' Name S? '>'
  7690. *
  7691. * With namespace
  7692. *
  7693. * [NS 9] ETag ::= '</' QName S? '>'
  7694. */
  7695. static void
  7696. xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
  7697. const xmlChar *name;
  7698. GROW;
  7699. if ((RAW != '<') || (NXT(1) != '/')) {
  7700. xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
  7701. "xmlParseEndTag: '</' not found\n");
  7702. return;
  7703. }
  7704. SKIP(2);
  7705. name = xmlParseNameAndCompare(ctxt,ctxt->name);
  7706. /*
  7707. * We should definitely be at the ending "S? '>'" part
  7708. */
  7709. GROW;
  7710. SKIP_BLANKS;
  7711. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  7712. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  7713. } else
  7714. NEXT1;
  7715. /*
  7716. * [ WFC: Element Type Match ]
  7717. * The Name in an element's end-tag must match the element type in the
  7718. * start-tag.
  7719. *
  7720. */
  7721. if (name != (xmlChar*)1) {
  7722. if (name == NULL) name = BAD_CAST "unparseable";
  7723. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  7724. "Opening and ending tag mismatch: %s line %d and %s\n",
  7725. ctxt->name, line, name);
  7726. }
  7727. /*
  7728. * SAX: End of Tag
  7729. */
  7730. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  7731. (!ctxt->disableSAX))
  7732. ctxt->sax->endElement(ctxt->userData, ctxt->name);
  7733. namePop(ctxt);
  7734. spacePop(ctxt);
  7735. return;
  7736. }
  7737. /**
  7738. * xmlParseEndTag:
  7739. * @ctxt: an XML parser context
  7740. *
  7741. * parse an end of tag
  7742. *
  7743. * [42] ETag ::= '</' Name S? '>'
  7744. *
  7745. * With namespace
  7746. *
  7747. * [NS 9] ETag ::= '</' QName S? '>'
  7748. */
  7749. void
  7750. xmlParseEndTag(xmlParserCtxtPtr ctxt) {
  7751. xmlParseEndTag1(ctxt, 0);
  7752. }
  7753. #endif /* LIBXML_SAX1_ENABLED */
  7754. /************************************************************************
  7755. * *
  7756. * SAX 2 specific operations *
  7757. * *
  7758. ************************************************************************/
  7759. /*
  7760. * xmlGetNamespace:
  7761. * @ctxt: an XML parser context
  7762. * @prefix: the prefix to lookup
  7763. *
  7764. * Lookup the namespace name for the @prefix (which ca be NULL)
  7765. * The prefix must come from the @ctxt->dict dictionnary
  7766. *
  7767. * Returns the namespace name or NULL if not bound
  7768. */
  7769. static const xmlChar *
  7770. xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
  7771. int i;
  7772. if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
  7773. for (i = ctxt->nsNr - 2;i >= 0;i-=2)
  7774. if (ctxt->nsTab[i] == prefix) {
  7775. if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
  7776. return(NULL);
  7777. return(ctxt->nsTab[i + 1]);
  7778. }
  7779. return(NULL);
  7780. }
  7781. /**
  7782. * xmlParseQName:
  7783. * @ctxt: an XML parser context
  7784. * @prefix: pointer to store the prefix part
  7785. *
  7786. * parse an XML Namespace QName
  7787. *
  7788. * [6] QName ::= (Prefix ':')? LocalPart
  7789. * [7] Prefix ::= NCName
  7790. * [8] LocalPart ::= NCName
  7791. *
  7792. * Returns the Name parsed or NULL
  7793. */
  7794. static const xmlChar *
  7795. xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
  7796. const xmlChar *l, *p;
  7797. GROW;
  7798. l = xmlParseNCName(ctxt);
  7799. if (l == NULL) {
  7800. if (CUR == ':') {
  7801. l = xmlParseName(ctxt);
  7802. if (l != NULL) {
  7803. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  7804. "Failed to parse QName '%s'\n", l, NULL, NULL);
  7805. *prefix = NULL;
  7806. return(l);
  7807. }
  7808. }
  7809. return(NULL);
  7810. }
  7811. if (CUR == ':') {
  7812. NEXT;
  7813. p = l;
  7814. l = xmlParseNCName(ctxt);
  7815. if (l == NULL) {
  7816. xmlChar *tmp;
  7817. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  7818. "Failed to parse QName '%s:'\n", p, NULL, NULL);
  7819. l = xmlParseNmtoken(ctxt);
  7820. if (l == NULL)
  7821. tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
  7822. else {
  7823. tmp = xmlBuildQName(l, p, NULL, 0);
  7824. xmlFree((char *)l);
  7825. }
  7826. p = xmlDictLookup(ctxt->dict, tmp, -1);
  7827. if (tmp != NULL) xmlFree(tmp);
  7828. *prefix = NULL;
  7829. return(p);
  7830. }
  7831. if (CUR == ':') {
  7832. xmlChar *tmp;
  7833. xmlNsErr(ctxt, XML_NS_ERR_QNAME,
  7834. "Failed to parse QName '%s:%s:'\n", p, l, NULL);
  7835. NEXT;
  7836. tmp = (xmlChar *) xmlParseName(ctxt);
  7837. if (tmp != NULL) {
  7838. tmp = xmlBuildQName(tmp, l, NULL, 0);
  7839. l = xmlDictLookup(ctxt->dict, tmp, -1);
  7840. if (tmp != NULL) xmlFree(tmp);
  7841. *prefix = p;
  7842. return(l);
  7843. }
  7844. tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
  7845. l = xmlDictLookup(ctxt->dict, tmp, -1);
  7846. if (tmp != NULL) xmlFree(tmp);
  7847. *prefix = p;
  7848. return(l);
  7849. }
  7850. *prefix = p;
  7851. } else
  7852. *prefix = NULL;
  7853. return(l);
  7854. }
  7855. /**
  7856. * xmlParseQNameAndCompare:
  7857. * @ctxt: an XML parser context
  7858. * @name: the localname
  7859. * @prefix: the prefix, if any.
  7860. *
  7861. * parse an XML name and compares for match
  7862. * (specialized for endtag parsing)
  7863. *
  7864. * Returns NULL for an illegal name, (xmlChar*) 1 for success
  7865. * and the name for mismatch
  7866. */
  7867. static const xmlChar *
  7868. xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
  7869. xmlChar const *prefix) {
  7870. const xmlChar *cmp;
  7871. const xmlChar *in;
  7872. const xmlChar *ret;
  7873. const xmlChar *prefix2;
  7874. if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
  7875. GROW;
  7876. in = ctxt->input->cur;
  7877. cmp = prefix;
  7878. while (*in != 0 && *in == *cmp) {
  7879. ++in;
  7880. ++cmp;
  7881. }
  7882. if ((*cmp == 0) && (*in == ':')) {
  7883. in++;
  7884. cmp = name;
  7885. while (*in != 0 && *in == *cmp) {
  7886. ++in;
  7887. ++cmp;
  7888. }
  7889. if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
  7890. /* success */
  7891. ctxt->input->cur = in;
  7892. return((const xmlChar*) 1);
  7893. }
  7894. }
  7895. /*
  7896. * all strings coms from the dictionary, equality can be done directly
  7897. */
  7898. ret = xmlParseQName (ctxt, &prefix2);
  7899. if ((ret == name) && (prefix == prefix2))
  7900. return((const xmlChar*) 1);
  7901. return ret;
  7902. }
  7903. /**
  7904. * xmlParseAttValueInternal:
  7905. * @ctxt: an XML parser context
  7906. * @len: attribute len result
  7907. * @alloc: whether the attribute was reallocated as a new string
  7908. * @normalize: if 1 then further non-CDATA normalization must be done
  7909. *
  7910. * parse a value for an attribute.
  7911. * NOTE: if no normalization is needed, the routine will return pointers
  7912. * directly from the data buffer.
  7913. *
  7914. * 3.3.3 Attribute-Value Normalization:
  7915. * Before the value of an attribute is passed to the application or
  7916. * checked for validity, the XML processor must normalize it as follows:
  7917. * - a character reference is processed by appending the referenced
  7918. * character to the attribute value
  7919. * - an entity reference is processed by recursively processing the
  7920. * replacement text of the entity
  7921. * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
  7922. * appending #x20 to the normalized value, except that only a single
  7923. * #x20 is appended for a "#xD#xA" sequence that is part of an external
  7924. * parsed entity or the literal entity value of an internal parsed entity
  7925. * - other characters are processed by appending them to the normalized value
  7926. * If the declared value is not CDATA, then the XML processor must further
  7927. * process the normalized attribute value by discarding any leading and
  7928. * trailing space (#x20) characters, and by replacing sequences of space
  7929. * (#x20) characters by a single space (#x20) character.
  7930. * All attributes for which no declaration has been read should be treated
  7931. * by a non-validating parser as if declared CDATA.
  7932. *
  7933. * Returns the AttValue parsed or NULL. The value has to be freed by the
  7934. * caller if it was copied, this can be detected by val[*len] == 0.
  7935. */
  7936. static xmlChar *
  7937. xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
  7938. int normalize)
  7939. {
  7940. xmlChar limit = 0;
  7941. const xmlChar *in = NULL, *start, *end, *last;
  7942. xmlChar *ret = NULL;
  7943. GROW;
  7944. in = (xmlChar *) CUR_PTR;
  7945. if (*in != '"' && *in != '\'') {
  7946. xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
  7947. return (NULL);
  7948. }
  7949. ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
  7950. /*
  7951. * try to handle in this routine the most common case where no
  7952. * allocation of a new string is required and where content is
  7953. * pure ASCII.
  7954. */
  7955. limit = *in++;
  7956. end = ctxt->input->end;
  7957. start = in;
  7958. if (in >= end) {
  7959. const xmlChar *oldbase = ctxt->input->base;
  7960. GROW;
  7961. if (oldbase != ctxt->input->base) {
  7962. long delta = ctxt->input->base - oldbase;
  7963. start = start + delta;
  7964. in = in + delta;
  7965. }
  7966. end = ctxt->input->end;
  7967. }
  7968. if (normalize) {
  7969. /*
  7970. * Skip any leading spaces
  7971. */
  7972. while ((in < end) && (*in != limit) &&
  7973. ((*in == 0x20) || (*in == 0x9) ||
  7974. (*in == 0xA) || (*in == 0xD))) {
  7975. in++;
  7976. start = in;
  7977. if (in >= end) {
  7978. const xmlChar *oldbase = ctxt->input->base;
  7979. GROW;
  7980. if (oldbase != ctxt->input->base) {
  7981. long delta = ctxt->input->base - oldbase;
  7982. start = start + delta;
  7983. in = in + delta;
  7984. }
  7985. end = ctxt->input->end;
  7986. }
  7987. }
  7988. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  7989. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  7990. if ((*in++ == 0x20) && (*in == 0x20)) break;
  7991. if (in >= end) {
  7992. const xmlChar *oldbase = ctxt->input->base;
  7993. GROW;
  7994. if (oldbase != ctxt->input->base) {
  7995. long delta = ctxt->input->base - oldbase;
  7996. start = start + delta;
  7997. in = in + delta;
  7998. }
  7999. end = ctxt->input->end;
  8000. }
  8001. }
  8002. last = in;
  8003. /*
  8004. * skip the trailing blanks
  8005. */
  8006. while ((last[-1] == 0x20) && (last > start)) last--;
  8007. while ((in < end) && (*in != limit) &&
  8008. ((*in == 0x20) || (*in == 0x9) ||
  8009. (*in == 0xA) || (*in == 0xD))) {
  8010. in++;
  8011. if (in >= end) {
  8012. const xmlChar *oldbase = ctxt->input->base;
  8013. GROW;
  8014. if (oldbase != ctxt->input->base) {
  8015. long delta = ctxt->input->base - oldbase;
  8016. start = start + delta;
  8017. in = in + delta;
  8018. last = last + delta;
  8019. }
  8020. end = ctxt->input->end;
  8021. }
  8022. }
  8023. if (*in != limit) goto need_complex;
  8024. } else {
  8025. while ((in < end) && (*in != limit) && (*in >= 0x20) &&
  8026. (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
  8027. in++;
  8028. if (in >= end) {
  8029. const xmlChar *oldbase = ctxt->input->base;
  8030. GROW;
  8031. if (oldbase != ctxt->input->base) {
  8032. long delta = ctxt->input->base - oldbase;
  8033. start = start + delta;
  8034. in = in + delta;
  8035. }
  8036. end = ctxt->input->end;
  8037. }
  8038. }
  8039. last = in;
  8040. if (*in != limit) goto need_complex;
  8041. }
  8042. in++;
  8043. if (len != NULL) {
  8044. *len = last - start;
  8045. ret = (xmlChar *) start;
  8046. } else {
  8047. if (alloc) *alloc = 1;
  8048. ret = xmlStrndup(start, last - start);
  8049. }
  8050. CUR_PTR = in;
  8051. if (alloc) *alloc = 0;
  8052. return ret;
  8053. need_complex:
  8054. if (alloc) *alloc = 1;
  8055. return xmlParseAttValueComplex(ctxt, len, normalize);
  8056. }
  8057. /**
  8058. * xmlParseAttribute2:
  8059. * @ctxt: an XML parser context
  8060. * @pref: the element prefix
  8061. * @elem: the element name
  8062. * @prefix: a xmlChar ** used to store the value of the attribute prefix
  8063. * @value: a xmlChar ** used to store the value of the attribute
  8064. * @len: an int * to save the length of the attribute
  8065. * @alloc: an int * to indicate if the attribute was allocated
  8066. *
  8067. * parse an attribute in the new SAX2 framework.
  8068. *
  8069. * Returns the attribute name, and the value in *value, .
  8070. */
  8071. static const xmlChar *
  8072. xmlParseAttribute2(xmlParserCtxtPtr ctxt,
  8073. const xmlChar * pref, const xmlChar * elem,
  8074. const xmlChar ** prefix, xmlChar ** value,
  8075. int *len, int *alloc)
  8076. {
  8077. const xmlChar *name;
  8078. xmlChar *val, *internal_val = NULL;
  8079. int normalize = 0;
  8080. *value = NULL;
  8081. GROW;
  8082. name = xmlParseQName(ctxt, prefix);
  8083. if (name == NULL) {
  8084. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8085. "error parsing attribute name\n");
  8086. return (NULL);
  8087. }
  8088. /*
  8089. * get the type if needed
  8090. */
  8091. if (ctxt->attsSpecial != NULL) {
  8092. int type;
  8093. type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
  8094. pref, elem, *prefix, name);
  8095. if (type != 0)
  8096. normalize = 1;
  8097. }
  8098. /*
  8099. * read the value
  8100. */
  8101. SKIP_BLANKS;
  8102. if (RAW == '=') {
  8103. NEXT;
  8104. SKIP_BLANKS;
  8105. val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
  8106. if (normalize) {
  8107. /*
  8108. * Sometimes a second normalisation pass for spaces is needed
  8109. * but that only happens if charrefs or entities refernces
  8110. * have been used in the attribute value, i.e. the attribute
  8111. * value have been extracted in an allocated string already.
  8112. */
  8113. if (*alloc) {
  8114. const xmlChar *val2;
  8115. val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
  8116. if ((val2 != NULL) && (val2 != val)) {
  8117. xmlFree(val);
  8118. val = (xmlChar *) val2;
  8119. }
  8120. }
  8121. }
  8122. ctxt->instate = XML_PARSER_CONTENT;
  8123. } else {
  8124. xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
  8125. "Specification mandate value for attribute %s\n",
  8126. name);
  8127. return (NULL);
  8128. }
  8129. if (*prefix == ctxt->str_xml) {
  8130. /*
  8131. * Check that xml:lang conforms to the specification
  8132. * No more registered as an error, just generate a warning now
  8133. * since this was deprecated in XML second edition
  8134. */
  8135. if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
  8136. internal_val = xmlStrndup(val, *len);
  8137. if (!xmlCheckLanguageID(internal_val)) {
  8138. xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
  8139. "Malformed value for xml:lang : %s\n",
  8140. internal_val, NULL);
  8141. }
  8142. }
  8143. /*
  8144. * Check that xml:space conforms to the specification
  8145. */
  8146. if (xmlStrEqual(name, BAD_CAST "space")) {
  8147. internal_val = xmlStrndup(val, *len);
  8148. if (xmlStrEqual(internal_val, BAD_CAST "default"))
  8149. *(ctxt->space) = 0;
  8150. else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
  8151. *(ctxt->space) = 1;
  8152. else {
  8153. xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
  8154. "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
  8155. internal_val, NULL);
  8156. }
  8157. }
  8158. if (internal_val) {
  8159. xmlFree(internal_val);
  8160. }
  8161. }
  8162. *value = val;
  8163. return (name);
  8164. }
  8165. /**
  8166. * xmlParseStartTag2:
  8167. * @ctxt: an XML parser context
  8168. *
  8169. * parse a start of tag either for rule element or
  8170. * EmptyElement. In both case we don't parse the tag closing chars.
  8171. * This routine is called when running SAX2 parsing
  8172. *
  8173. * [40] STag ::= '<' Name (S Attribute)* S? '>'
  8174. *
  8175. * [ WFC: Unique Att Spec ]
  8176. * No attribute name may appear more than once in the same start-tag or
  8177. * empty-element tag.
  8178. *
  8179. * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  8180. *
  8181. * [ WFC: Unique Att Spec ]
  8182. * No attribute name may appear more than once in the same start-tag or
  8183. * empty-element tag.
  8184. *
  8185. * With namespace:
  8186. *
  8187. * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
  8188. *
  8189. * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
  8190. *
  8191. * Returns the element name parsed
  8192. */
  8193. static const xmlChar *
  8194. xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
  8195. const xmlChar **URI, int *tlen) {
  8196. const xmlChar *localname;
  8197. const xmlChar *prefix;
  8198. const xmlChar *attname;
  8199. const xmlChar *aprefix;
  8200. const xmlChar *nsname;
  8201. xmlChar *attvalue;
  8202. const xmlChar **atts = ctxt->atts;
  8203. int maxatts = ctxt->maxatts;
  8204. int nratts, nbatts, nbdef;
  8205. int i, j, nbNs, attval, oldline, oldcol;
  8206. const xmlChar *base;
  8207. unsigned long cur;
  8208. int nsNr = ctxt->nsNr;
  8209. if (RAW != '<') return(NULL);
  8210. NEXT1;
  8211. /*
  8212. * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
  8213. * point since the attribute values may be stored as pointers to
  8214. * the buffer and calling SHRINK would destroy them !
  8215. * The Shrinking is only possible once the full set of attribute
  8216. * callbacks have been done.
  8217. */
  8218. reparse:
  8219. SHRINK;
  8220. base = ctxt->input->base;
  8221. cur = ctxt->input->cur - ctxt->input->base;
  8222. oldline = ctxt->input->line;
  8223. oldcol = ctxt->input->col;
  8224. nbatts = 0;
  8225. nratts = 0;
  8226. nbdef = 0;
  8227. nbNs = 0;
  8228. attval = 0;
  8229. /* Forget any namespaces added during an earlier parse of this element. */
  8230. ctxt->nsNr = nsNr;
  8231. localname = xmlParseQName(ctxt, &prefix);
  8232. if (localname == NULL) {
  8233. xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
  8234. "StartTag: invalid element name\n");
  8235. return(NULL);
  8236. }
  8237. *tlen = ctxt->input->cur - ctxt->input->base - cur;
  8238. /*
  8239. * Now parse the attributes, it ends up with the ending
  8240. *
  8241. * (S Attribute)* S?
  8242. */
  8243. SKIP_BLANKS;
  8244. GROW;
  8245. if (ctxt->input->base != base) goto base_changed;
  8246. while ((RAW != '>') &&
  8247. ((RAW != '/') || (NXT(1) != '>')) &&
  8248. (IS_BYTE_CHAR(RAW))) {
  8249. const xmlChar *q = CUR_PTR;
  8250. unsigned int cons = ctxt->input->consumed;
  8251. int len = -1, alloc = 0;
  8252. attname = xmlParseAttribute2(ctxt, prefix, localname,
  8253. &aprefix, &attvalue, &len, &alloc);
  8254. if (ctxt->input->base != base) {
  8255. if ((attvalue != NULL) && (alloc != 0))
  8256. xmlFree(attvalue);
  8257. attvalue = NULL;
  8258. goto base_changed;
  8259. }
  8260. if ((attname != NULL) && (attvalue != NULL)) {
  8261. if (len < 0) len = xmlStrlen(attvalue);
  8262. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8263. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8264. xmlURIPtr uri;
  8265. if (*URL != 0) {
  8266. uri = xmlParseURI((const char *) URL);
  8267. if (uri == NULL) {
  8268. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8269. "xmlns: '%s' is not a valid URI\n",
  8270. URL, NULL, NULL);
  8271. } else {
  8272. if (uri->scheme == NULL) {
  8273. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8274. "xmlns: URI %s is not absolute\n",
  8275. URL, NULL, NULL);
  8276. }
  8277. xmlFreeURI(uri);
  8278. }
  8279. if (URL == ctxt->str_xml_ns) {
  8280. if (attname != ctxt->str_xml) {
  8281. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8282. "xml namespace URI cannot be the default namespace\n",
  8283. NULL, NULL, NULL);
  8284. }
  8285. goto skip_default_ns;
  8286. }
  8287. if ((len == 29) &&
  8288. (xmlStrEqual(URL,
  8289. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8290. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8291. "reuse of the xmlns namespace name is forbidden\n",
  8292. NULL, NULL, NULL);
  8293. goto skip_default_ns;
  8294. }
  8295. }
  8296. /*
  8297. * check that it's not a defined namespace
  8298. */
  8299. for (j = 1;j <= nbNs;j++)
  8300. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8301. break;
  8302. if (j <= nbNs)
  8303. xmlErrAttributeDup(ctxt, NULL, attname);
  8304. else
  8305. if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
  8306. skip_default_ns:
  8307. if (alloc != 0) xmlFree(attvalue);
  8308. SKIP_BLANKS;
  8309. continue;
  8310. }
  8311. if (aprefix == ctxt->str_xmlns) {
  8312. const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
  8313. xmlURIPtr uri;
  8314. if (attname == ctxt->str_xml) {
  8315. if (URL != ctxt->str_xml_ns) {
  8316. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8317. "xml namespace prefix mapped to wrong URI\n",
  8318. NULL, NULL, NULL);
  8319. }
  8320. /*
  8321. * Do not keep a namespace definition node
  8322. */
  8323. goto skip_ns;
  8324. }
  8325. if (URL == ctxt->str_xml_ns) {
  8326. if (attname != ctxt->str_xml) {
  8327. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8328. "xml namespace URI mapped to wrong prefix\n",
  8329. NULL, NULL, NULL);
  8330. }
  8331. goto skip_ns;
  8332. }
  8333. if (attname == ctxt->str_xmlns) {
  8334. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8335. "redefinition of the xmlns prefix is forbidden\n",
  8336. NULL, NULL, NULL);
  8337. goto skip_ns;
  8338. }
  8339. if ((len == 29) &&
  8340. (xmlStrEqual(URL,
  8341. BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
  8342. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8343. "reuse of the xmlns namespace name is forbidden\n",
  8344. NULL, NULL, NULL);
  8345. goto skip_ns;
  8346. }
  8347. if ((URL == NULL) || (URL[0] == 0)) {
  8348. xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
  8349. "xmlns:%s: Empty XML namespace is not allowed\n",
  8350. attname, NULL, NULL);
  8351. goto skip_ns;
  8352. } else {
  8353. uri = xmlParseURI((const char *) URL);
  8354. if (uri == NULL) {
  8355. xmlNsErr(ctxt, XML_WAR_NS_URI,
  8356. "xmlns:%s: '%s' is not a valid URI\n",
  8357. attname, URL, NULL);
  8358. } else {
  8359. if ((ctxt->pedantic) && (uri->scheme == NULL)) {
  8360. xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
  8361. "xmlns:%s: URI %s is not absolute\n",
  8362. attname, URL, NULL);
  8363. }
  8364. xmlFreeURI(uri);
  8365. }
  8366. }
  8367. /*
  8368. * check that it's not a defined namespace
  8369. */
  8370. for (j = 1;j <= nbNs;j++)
  8371. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8372. break;
  8373. if (j <= nbNs)
  8374. xmlErrAttributeDup(ctxt, aprefix, attname);
  8375. else
  8376. if (nsPush(ctxt, attname, URL) > 0) nbNs++;
  8377. skip_ns:
  8378. if (alloc != 0) xmlFree(attvalue);
  8379. SKIP_BLANKS;
  8380. if (ctxt->input->base != base) goto base_changed;
  8381. continue;
  8382. }
  8383. /*
  8384. * Add the pair to atts
  8385. */
  8386. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8387. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8388. if (attvalue[len] == 0)
  8389. xmlFree(attvalue);
  8390. goto failed;
  8391. }
  8392. maxatts = ctxt->maxatts;
  8393. atts = ctxt->atts;
  8394. }
  8395. ctxt->attallocs[nratts++] = alloc;
  8396. atts[nbatts++] = attname;
  8397. atts[nbatts++] = aprefix;
  8398. atts[nbatts++] = NULL; /* the URI will be fetched later */
  8399. atts[nbatts++] = attvalue;
  8400. attvalue += len;
  8401. atts[nbatts++] = attvalue;
  8402. /*
  8403. * tag if some deallocation is needed
  8404. */
  8405. if (alloc != 0) attval = 1;
  8406. } else {
  8407. if ((attvalue != NULL) && (attvalue[len] == 0))
  8408. xmlFree(attvalue);
  8409. }
  8410. failed:
  8411. GROW
  8412. if (ctxt->input->base != base) goto base_changed;
  8413. if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
  8414. break;
  8415. if (!IS_BLANK_CH(RAW)) {
  8416. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  8417. "attributes construct error\n");
  8418. break;
  8419. }
  8420. SKIP_BLANKS;
  8421. if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
  8422. (attname == NULL) && (attvalue == NULL)) {
  8423. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8424. "xmlParseStartTag: problem parsing attributes\n");
  8425. break;
  8426. }
  8427. GROW;
  8428. if (ctxt->input->base != base) goto base_changed;
  8429. }
  8430. /*
  8431. * The attributes defaulting
  8432. */
  8433. if (ctxt->attsDefault != NULL) {
  8434. xmlDefAttrsPtr defaults;
  8435. defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
  8436. if (defaults != NULL) {
  8437. for (i = 0;i < defaults->nbAttrs;i++) {
  8438. attname = defaults->values[5 * i];
  8439. aprefix = defaults->values[5 * i + 1];
  8440. /*
  8441. * special work for namespaces defaulted defs
  8442. */
  8443. if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
  8444. /*
  8445. * check that it's not a defined namespace
  8446. */
  8447. for (j = 1;j <= nbNs;j++)
  8448. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
  8449. break;
  8450. if (j <= nbNs) continue;
  8451. nsname = xmlGetNamespace(ctxt, NULL);
  8452. if (nsname != defaults->values[5 * i + 2]) {
  8453. if (nsPush(ctxt, NULL,
  8454. defaults->values[5 * i + 2]) > 0)
  8455. nbNs++;
  8456. }
  8457. } else if (aprefix == ctxt->str_xmlns) {
  8458. /*
  8459. * check that it's not a defined namespace
  8460. */
  8461. for (j = 1;j <= nbNs;j++)
  8462. if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
  8463. break;
  8464. if (j <= nbNs) continue;
  8465. nsname = xmlGetNamespace(ctxt, attname);
  8466. if (nsname != defaults->values[2]) {
  8467. if (nsPush(ctxt, attname,
  8468. defaults->values[5 * i + 2]) > 0)
  8469. nbNs++;
  8470. }
  8471. } else {
  8472. /*
  8473. * check that it's not a defined attribute
  8474. */
  8475. for (j = 0;j < nbatts;j+=5) {
  8476. if ((attname == atts[j]) && (aprefix == atts[j+1]))
  8477. break;
  8478. }
  8479. if (j < nbatts) continue;
  8480. if ((atts == NULL) || (nbatts + 5 > maxatts)) {
  8481. if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
  8482. return(NULL);
  8483. }
  8484. maxatts = ctxt->maxatts;
  8485. atts = ctxt->atts;
  8486. }
  8487. atts[nbatts++] = attname;
  8488. atts[nbatts++] = aprefix;
  8489. if (aprefix == NULL)
  8490. atts[nbatts++] = NULL;
  8491. else
  8492. atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
  8493. atts[nbatts++] = defaults->values[5 * i + 2];
  8494. atts[nbatts++] = defaults->values[5 * i + 3];
  8495. if ((ctxt->standalone == 1) &&
  8496. (defaults->values[5 * i + 4] != NULL)) {
  8497. xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
  8498. "standalone: attribute %s on %s defaulted from external subset\n",
  8499. attname, localname);
  8500. }
  8501. nbdef++;
  8502. }
  8503. }
  8504. }
  8505. }
  8506. /*
  8507. * The attributes checkings
  8508. */
  8509. for (i = 0; i < nbatts;i += 5) {
  8510. /*
  8511. * The default namespace does not apply to attribute names.
  8512. */
  8513. if (atts[i + 1] != NULL) {
  8514. nsname = xmlGetNamespace(ctxt, atts[i + 1]);
  8515. if (nsname == NULL) {
  8516. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  8517. "Namespace prefix %s for %s on %s is not defined\n",
  8518. atts[i + 1], atts[i], localname);
  8519. }
  8520. atts[i + 2] = nsname;
  8521. } else
  8522. nsname = NULL;
  8523. /*
  8524. * [ WFC: Unique Att Spec ]
  8525. * No attribute name may appear more than once in the same
  8526. * start-tag or empty-element tag.
  8527. * As extended by the Namespace in XML REC.
  8528. */
  8529. for (j = 0; j < i;j += 5) {
  8530. if (atts[i] == atts[j]) {
  8531. if (atts[i+1] == atts[j+1]) {
  8532. xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
  8533. break;
  8534. }
  8535. if ((nsname != NULL) && (atts[j + 2] == nsname)) {
  8536. xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
  8537. "Namespaced Attribute %s in '%s' redefined\n",
  8538. atts[i], nsname, NULL);
  8539. break;
  8540. }
  8541. }
  8542. }
  8543. }
  8544. nsname = xmlGetNamespace(ctxt, prefix);
  8545. if ((prefix != NULL) && (nsname == NULL)) {
  8546. xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
  8547. "Namespace prefix %s on %s is not defined\n",
  8548. prefix, localname, NULL);
  8549. }
  8550. *pref = prefix;
  8551. *URI = nsname;
  8552. /*
  8553. * SAX: Start of Element !
  8554. */
  8555. if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
  8556. (!ctxt->disableSAX)) {
  8557. if (nbNs > 0)
  8558. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  8559. nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
  8560. nbatts / 5, nbdef, atts);
  8561. else
  8562. ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
  8563. nsname, 0, NULL, nbatts / 5, nbdef, atts);
  8564. }
  8565. /*
  8566. * Free up attribute allocated strings if needed
  8567. */
  8568. if (attval != 0) {
  8569. for (i = 3,j = 0; j < nratts;i += 5,j++)
  8570. if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
  8571. xmlFree((xmlChar *) atts[i]);
  8572. }
  8573. return(localname);
  8574. base_changed:
  8575. /*
  8576. * the attribute strings are valid iif the base didn't changed
  8577. */
  8578. if (attval != 0) {
  8579. for (i = 3,j = 0; j < nratts;i += 5,j++)
  8580. if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
  8581. xmlFree((xmlChar *) atts[i]);
  8582. }
  8583. ctxt->input->cur = ctxt->input->base + cur;
  8584. ctxt->input->line = oldline;
  8585. ctxt->input->col = oldcol;
  8586. if (ctxt->wellFormed == 1) {
  8587. goto reparse;
  8588. }
  8589. return(NULL);
  8590. }
  8591. /**
  8592. * xmlParseEndTag2:
  8593. * @ctxt: an XML parser context
  8594. * @line: line of the start tag
  8595. * @nsNr: number of namespaces on the start tag
  8596. *
  8597. * parse an end of tag
  8598. *
  8599. * [42] ETag ::= '</' Name S? '>'
  8600. *
  8601. * With namespace
  8602. *
  8603. * [NS 9] ETag ::= '</' QName S? '>'
  8604. */
  8605. static void
  8606. xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
  8607. const xmlChar *URI, int line, int nsNr, int tlen) {
  8608. const xmlChar *name;
  8609. GROW;
  8610. if ((RAW != '<') || (NXT(1) != '/')) {
  8611. xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
  8612. return;
  8613. }
  8614. SKIP(2);
  8615. if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
  8616. if (ctxt->input->cur[tlen] == '>') {
  8617. ctxt->input->cur += tlen + 1;
  8618. goto done;
  8619. }
  8620. ctxt->input->cur += tlen;
  8621. name = (xmlChar*)1;
  8622. } else {
  8623. if (prefix == NULL)
  8624. name = xmlParseNameAndCompare(ctxt, ctxt->name);
  8625. else
  8626. name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
  8627. }
  8628. /*
  8629. * We should definitely be at the ending "S? '>'" part
  8630. */
  8631. GROW;
  8632. SKIP_BLANKS;
  8633. if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
  8634. xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
  8635. } else
  8636. NEXT1;
  8637. /*
  8638. * [ WFC: Element Type Match ]
  8639. * The Name in an element's end-tag must match the element type in the
  8640. * start-tag.
  8641. *
  8642. */
  8643. if (name != (xmlChar*)1) {
  8644. if (name == NULL) name = BAD_CAST "unparseable";
  8645. if ((line == 0) && (ctxt->node != NULL))
  8646. line = ctxt->node->line;
  8647. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
  8648. "Opening and ending tag mismatch: %s line %d and %s\n",
  8649. ctxt->name, line, name);
  8650. }
  8651. /*
  8652. * SAX: End of Tag
  8653. */
  8654. done:
  8655. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  8656. (!ctxt->disableSAX))
  8657. ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
  8658. spacePop(ctxt);
  8659. if (nsNr != 0)
  8660. nsPop(ctxt, nsNr);
  8661. return;
  8662. }
  8663. /**
  8664. * xmlParseCDSect:
  8665. * @ctxt: an XML parser context
  8666. *
  8667. * Parse escaped pure raw content.
  8668. *
  8669. * [18] CDSect ::= CDStart CData CDEnd
  8670. *
  8671. * [19] CDStart ::= '<![CDATA['
  8672. *
  8673. * [20] Data ::= (Char* - (Char* ']]>' Char*))
  8674. *
  8675. * [21] CDEnd ::= ']]>'
  8676. */
  8677. void
  8678. xmlParseCDSect(xmlParserCtxtPtr ctxt) {
  8679. xmlChar *buf = NULL;
  8680. int len = 0;
  8681. int size = XML_PARSER_BUFFER_SIZE;
  8682. int r, rl;
  8683. int s, sl;
  8684. int cur, l;
  8685. int count = 0;
  8686. /* Check 2.6.0 was NXT(0) not RAW */
  8687. if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  8688. SKIP(9);
  8689. } else
  8690. return;
  8691. ctxt->instate = XML_PARSER_CDATA_SECTION;
  8692. r = CUR_CHAR(rl);
  8693. if (!IS_CHAR(r)) {
  8694. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  8695. ctxt->instate = XML_PARSER_CONTENT;
  8696. return;
  8697. }
  8698. NEXTL(rl);
  8699. s = CUR_CHAR(sl);
  8700. if (!IS_CHAR(s)) {
  8701. xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
  8702. ctxt->instate = XML_PARSER_CONTENT;
  8703. return;
  8704. }
  8705. NEXTL(sl);
  8706. cur = CUR_CHAR(l);
  8707. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  8708. if (buf == NULL) {
  8709. xmlErrMemory(ctxt, NULL);
  8710. return;
  8711. }
  8712. while (IS_CHAR(cur) &&
  8713. ((r != ']') || (s != ']') || (cur != '>'))) {
  8714. if (len + 5 >= size) {
  8715. xmlChar *tmp;
  8716. size *= 2;
  8717. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  8718. if (tmp == NULL) {
  8719. xmlFree(buf);
  8720. xmlErrMemory(ctxt, NULL);
  8721. return;
  8722. }
  8723. buf = tmp;
  8724. }
  8725. COPY_BUF(rl,buf,len,r);
  8726. r = s;
  8727. rl = sl;
  8728. s = cur;
  8729. sl = l;
  8730. count++;
  8731. if (count > 50) {
  8732. GROW;
  8733. count = 0;
  8734. }
  8735. NEXTL(l);
  8736. cur = CUR_CHAR(l);
  8737. }
  8738. buf[len] = 0;
  8739. ctxt->instate = XML_PARSER_CONTENT;
  8740. if (cur != '>') {
  8741. xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
  8742. "CData section not finished\n%.50s\n", buf);
  8743. xmlFree(buf);
  8744. return;
  8745. }
  8746. NEXTL(l);
  8747. /*
  8748. * OK the buffer is to be consumed as cdata.
  8749. */
  8750. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  8751. if (ctxt->sax->cdataBlock != NULL)
  8752. ctxt->sax->cdataBlock(ctxt->userData, buf, len);
  8753. else if (ctxt->sax->characters != NULL)
  8754. ctxt->sax->characters(ctxt->userData, buf, len);
  8755. }
  8756. xmlFree(buf);
  8757. }
  8758. /**
  8759. * xmlParseContent:
  8760. * @ctxt: an XML parser context
  8761. *
  8762. * Parse a content:
  8763. *
  8764. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  8765. */
  8766. void
  8767. xmlParseContent(xmlParserCtxtPtr ctxt) {
  8768. GROW;
  8769. while ((RAW != 0) &&
  8770. ((RAW != '<') || (NXT(1) != '/')) &&
  8771. (ctxt->instate != XML_PARSER_EOF)) {
  8772. const xmlChar *test = CUR_PTR;
  8773. unsigned int cons = ctxt->input->consumed;
  8774. const xmlChar *cur = ctxt->input->cur;
  8775. /*
  8776. * First case : a Processing Instruction.
  8777. */
  8778. if ((*cur == '<') && (cur[1] == '?')) {
  8779. xmlParsePI(ctxt);
  8780. }
  8781. /*
  8782. * Second case : a CDSection
  8783. */
  8784. /* 2.6.0 test was *cur not RAW */
  8785. else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
  8786. xmlParseCDSect(ctxt);
  8787. }
  8788. /*
  8789. * Third case : a comment
  8790. */
  8791. else if ((*cur == '<') && (NXT(1) == '!') &&
  8792. (NXT(2) == '-') && (NXT(3) == '-')) {
  8793. xmlParseComment(ctxt);
  8794. ctxt->instate = XML_PARSER_CONTENT;
  8795. }
  8796. /*
  8797. * Fourth case : a sub-element.
  8798. */
  8799. else if (*cur == '<') {
  8800. xmlParseElement(ctxt);
  8801. }
  8802. /*
  8803. * Fifth case : a reference. If if has not been resolved,
  8804. * parsing returns it's Name, create the node
  8805. */
  8806. else if (*cur == '&') {
  8807. xmlParseReference(ctxt);
  8808. }
  8809. /*
  8810. * Last case, text. Note that References are handled directly.
  8811. */
  8812. else {
  8813. xmlParseCharData(ctxt, 0);
  8814. }
  8815. GROW;
  8816. /*
  8817. * Pop-up of finished entities.
  8818. */
  8819. while ((RAW == 0) && (ctxt->inputNr > 1))
  8820. xmlPopInput(ctxt);
  8821. SHRINK;
  8822. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  8823. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  8824. "detected an error in element content\n");
  8825. ctxt->instate = XML_PARSER_EOF;
  8826. break;
  8827. }
  8828. }
  8829. }
  8830. /**
  8831. * xmlParseElement:
  8832. * @ctxt: an XML parser context
  8833. *
  8834. * parse an XML element, this is highly recursive
  8835. *
  8836. * [39] element ::= EmptyElemTag | STag content ETag
  8837. *
  8838. * [ WFC: Element Type Match ]
  8839. * The Name in an element's end-tag must match the element type in the
  8840. * start-tag.
  8841. *
  8842. */
  8843. void
  8844. xmlParseElement(xmlParserCtxtPtr ctxt) {
  8845. const xmlChar *name;
  8846. const xmlChar *prefix = NULL;
  8847. const xmlChar *URI = NULL;
  8848. xmlParserNodeInfo node_info;
  8849. int line, tlen;
  8850. xmlNodePtr ret;
  8851. int nsNr = ctxt->nsNr;
  8852. if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
  8853. ((ctxt->options & XML_PARSE_HUGE) == 0)) {
  8854. xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
  8855. "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
  8856. xmlParserMaxDepth);
  8857. ctxt->instate = XML_PARSER_EOF;
  8858. return;
  8859. }
  8860. /* Capture start position */
  8861. if (ctxt->record_info) {
  8862. node_info.begin_pos = ctxt->input->consumed +
  8863. (CUR_PTR - ctxt->input->base);
  8864. node_info.begin_line = ctxt->input->line;
  8865. }
  8866. if (ctxt->spaceNr == 0)
  8867. spacePush(ctxt, -1);
  8868. else if (*ctxt->space == -2)
  8869. spacePush(ctxt, -1);
  8870. else
  8871. spacePush(ctxt, *ctxt->space);
  8872. line = ctxt->input->line;
  8873. #ifdef LIBXML_SAX1_ENABLED
  8874. if (ctxt->sax2)
  8875. #endif /* LIBXML_SAX1_ENABLED */
  8876. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  8877. #ifdef LIBXML_SAX1_ENABLED
  8878. else
  8879. name = xmlParseStartTag(ctxt);
  8880. #endif /* LIBXML_SAX1_ENABLED */
  8881. if (name == NULL) {
  8882. spacePop(ctxt);
  8883. return;
  8884. }
  8885. namePush(ctxt, name);
  8886. ret = ctxt->node;
  8887. #ifdef LIBXML_VALID_ENABLED
  8888. /*
  8889. * [ VC: Root Element Type ]
  8890. * The Name in the document type declaration must match the element
  8891. * type of the root element.
  8892. */
  8893. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  8894. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  8895. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  8896. #endif /* LIBXML_VALID_ENABLED */
  8897. /*
  8898. * Check for an Empty Element.
  8899. */
  8900. if ((RAW == '/') && (NXT(1) == '>')) {
  8901. SKIP(2);
  8902. if (ctxt->sax2) {
  8903. if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
  8904. (!ctxt->disableSAX))
  8905. ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
  8906. #ifdef LIBXML_SAX1_ENABLED
  8907. } else {
  8908. if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
  8909. (!ctxt->disableSAX))
  8910. ctxt->sax->endElement(ctxt->userData, name);
  8911. #endif /* LIBXML_SAX1_ENABLED */
  8912. }
  8913. namePop(ctxt);
  8914. spacePop(ctxt);
  8915. if (nsNr != ctxt->nsNr)
  8916. nsPop(ctxt, ctxt->nsNr - nsNr);
  8917. if ( ret != NULL && ctxt->record_info ) {
  8918. node_info.end_pos = ctxt->input->consumed +
  8919. (CUR_PTR - ctxt->input->base);
  8920. node_info.end_line = ctxt->input->line;
  8921. node_info.node = ret;
  8922. xmlParserAddNodeInfo(ctxt, &node_info);
  8923. }
  8924. return;
  8925. }
  8926. if (RAW == '>') {
  8927. NEXT1;
  8928. } else {
  8929. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
  8930. "Couldn't find end of Start Tag %s line %d\n",
  8931. name, line, NULL);
  8932. /*
  8933. * end of parsing of this node.
  8934. */
  8935. nodePop(ctxt);
  8936. namePop(ctxt);
  8937. spacePop(ctxt);
  8938. if (nsNr != ctxt->nsNr)
  8939. nsPop(ctxt, ctxt->nsNr - nsNr);
  8940. /*
  8941. * Capture end position and add node
  8942. */
  8943. if ( ret != NULL && ctxt->record_info ) {
  8944. node_info.end_pos = ctxt->input->consumed +
  8945. (CUR_PTR - ctxt->input->base);
  8946. node_info.end_line = ctxt->input->line;
  8947. node_info.node = ret;
  8948. xmlParserAddNodeInfo(ctxt, &node_info);
  8949. }
  8950. return;
  8951. }
  8952. /*
  8953. * Parse the content of the element:
  8954. */
  8955. xmlParseContent(ctxt);
  8956. if (!IS_BYTE_CHAR(RAW)) {
  8957. xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
  8958. "Premature end of data in tag %s line %d\n",
  8959. name, line, NULL);
  8960. /*
  8961. * end of parsing of this node.
  8962. */
  8963. nodePop(ctxt);
  8964. namePop(ctxt);
  8965. spacePop(ctxt);
  8966. if (nsNr != ctxt->nsNr)
  8967. nsPop(ctxt, ctxt->nsNr - nsNr);
  8968. return;
  8969. }
  8970. /*
  8971. * parse the end of tag: '</' should be here.
  8972. */
  8973. if (ctxt->sax2) {
  8974. xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
  8975. namePop(ctxt);
  8976. }
  8977. #ifdef LIBXML_SAX1_ENABLED
  8978. else
  8979. xmlParseEndTag1(ctxt, line);
  8980. #endif /* LIBXML_SAX1_ENABLED */
  8981. /*
  8982. * Capture end position and add node
  8983. */
  8984. if ( ret != NULL && ctxt->record_info ) {
  8985. node_info.end_pos = ctxt->input->consumed +
  8986. (CUR_PTR - ctxt->input->base);
  8987. node_info.end_line = ctxt->input->line;
  8988. node_info.node = ret;
  8989. xmlParserAddNodeInfo(ctxt, &node_info);
  8990. }
  8991. }
  8992. /**
  8993. * xmlParseVersionNum:
  8994. * @ctxt: an XML parser context
  8995. *
  8996. * parse the XML version value.
  8997. *
  8998. * [26] VersionNum ::= '1.' [0-9]+
  8999. *
  9000. * In practice allow [0-9].[0-9]+ at that level
  9001. *
  9002. * Returns the string giving the XML version number, or NULL
  9003. */
  9004. xmlChar *
  9005. xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
  9006. xmlChar *buf = NULL;
  9007. int len = 0;
  9008. int size = 10;
  9009. xmlChar cur;
  9010. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9011. if (buf == NULL) {
  9012. xmlErrMemory(ctxt, NULL);
  9013. return(NULL);
  9014. }
  9015. cur = CUR;
  9016. if (!((cur >= '0') && (cur <= '9'))) {
  9017. xmlFree(buf);
  9018. return(NULL);
  9019. }
  9020. buf[len++] = cur;
  9021. NEXT;
  9022. cur=CUR;
  9023. if (cur != '.') {
  9024. xmlFree(buf);
  9025. return(NULL);
  9026. }
  9027. buf[len++] = cur;
  9028. NEXT;
  9029. cur=CUR;
  9030. while ((cur >= '0') && (cur <= '9')) {
  9031. if (len + 1 >= size) {
  9032. xmlChar *tmp;
  9033. size *= 2;
  9034. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9035. if (tmp == NULL) {
  9036. xmlFree(buf);
  9037. xmlErrMemory(ctxt, NULL);
  9038. return(NULL);
  9039. }
  9040. buf = tmp;
  9041. }
  9042. buf[len++] = cur;
  9043. NEXT;
  9044. cur=CUR;
  9045. }
  9046. buf[len] = 0;
  9047. return(buf);
  9048. }
  9049. /**
  9050. * xmlParseVersionInfo:
  9051. * @ctxt: an XML parser context
  9052. *
  9053. * parse the XML version.
  9054. *
  9055. * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  9056. *
  9057. * [25] Eq ::= S? '=' S?
  9058. *
  9059. * Returns the version string, e.g. "1.0"
  9060. */
  9061. xmlChar *
  9062. xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
  9063. xmlChar *version = NULL;
  9064. if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
  9065. SKIP(7);
  9066. SKIP_BLANKS;
  9067. if (RAW != '=') {
  9068. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9069. return(NULL);
  9070. }
  9071. NEXT;
  9072. SKIP_BLANKS;
  9073. if (RAW == '"') {
  9074. NEXT;
  9075. version = xmlParseVersionNum(ctxt);
  9076. if (RAW != '"') {
  9077. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9078. } else
  9079. NEXT;
  9080. } else if (RAW == '\''){
  9081. NEXT;
  9082. version = xmlParseVersionNum(ctxt);
  9083. if (RAW != '\'') {
  9084. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9085. } else
  9086. NEXT;
  9087. } else {
  9088. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9089. }
  9090. }
  9091. return(version);
  9092. }
  9093. /**
  9094. * xmlParseEncName:
  9095. * @ctxt: an XML parser context
  9096. *
  9097. * parse the XML encoding name
  9098. *
  9099. * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  9100. *
  9101. * Returns the encoding name value or NULL
  9102. */
  9103. xmlChar *
  9104. xmlParseEncName(xmlParserCtxtPtr ctxt) {
  9105. xmlChar *buf = NULL;
  9106. int len = 0;
  9107. int size = 10;
  9108. xmlChar cur;
  9109. cur = CUR;
  9110. if (((cur >= 'a') && (cur <= 'z')) ||
  9111. ((cur >= 'A') && (cur <= 'Z'))) {
  9112. buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
  9113. if (buf == NULL) {
  9114. xmlErrMemory(ctxt, NULL);
  9115. return(NULL);
  9116. }
  9117. buf[len++] = cur;
  9118. NEXT;
  9119. cur = CUR;
  9120. while (((cur >= 'a') && (cur <= 'z')) ||
  9121. ((cur >= 'A') && (cur <= 'Z')) ||
  9122. ((cur >= '0') && (cur <= '9')) ||
  9123. (cur == '.') || (cur == '_') ||
  9124. (cur == '-')) {
  9125. if (len + 1 >= size) {
  9126. xmlChar *tmp;
  9127. size *= 2;
  9128. tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
  9129. if (tmp == NULL) {
  9130. xmlErrMemory(ctxt, NULL);
  9131. xmlFree(buf);
  9132. return(NULL);
  9133. }
  9134. buf = tmp;
  9135. }
  9136. buf[len++] = cur;
  9137. NEXT;
  9138. cur = CUR;
  9139. if (cur == 0) {
  9140. SHRINK;
  9141. GROW;
  9142. cur = CUR;
  9143. }
  9144. }
  9145. buf[len] = 0;
  9146. } else {
  9147. xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
  9148. }
  9149. return(buf);
  9150. }
  9151. /**
  9152. * xmlParseEncodingDecl:
  9153. * @ctxt: an XML parser context
  9154. *
  9155. * parse the XML encoding declaration
  9156. *
  9157. * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
  9158. *
  9159. * this setups the conversion filters.
  9160. *
  9161. * Returns the encoding value or NULL
  9162. */
  9163. const xmlChar *
  9164. xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
  9165. xmlChar *encoding = NULL;
  9166. SKIP_BLANKS;
  9167. if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
  9168. SKIP(8);
  9169. SKIP_BLANKS;
  9170. if (RAW != '=') {
  9171. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9172. return(NULL);
  9173. }
  9174. NEXT;
  9175. SKIP_BLANKS;
  9176. if (RAW == '"') {
  9177. NEXT;
  9178. encoding = xmlParseEncName(ctxt);
  9179. if (RAW != '"') {
  9180. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9181. } else
  9182. NEXT;
  9183. } else if (RAW == '\''){
  9184. NEXT;
  9185. encoding = xmlParseEncName(ctxt);
  9186. if (RAW != '\'') {
  9187. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9188. } else
  9189. NEXT;
  9190. } else {
  9191. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9192. }
  9193. /*
  9194. * UTF-16 encoding stwich has already taken place at this stage,
  9195. * more over the little-endian/big-endian selection is already done
  9196. */
  9197. if ((encoding != NULL) &&
  9198. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
  9199. (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
  9200. /*
  9201. * If no encoding was passed to the parser, that we are
  9202. * using UTF-16 and no decoder is present i.e. the
  9203. * document is apparently UTF-8 compatible, then raise an
  9204. * encoding mismatch fatal error
  9205. */
  9206. if ((ctxt->encoding == NULL) &&
  9207. (ctxt->input->buf != NULL) &&
  9208. (ctxt->input->buf->encoder == NULL)) {
  9209. xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
  9210. "Document labelled UTF-16 but has UTF-8 content\n");
  9211. }
  9212. if (ctxt->encoding != NULL)
  9213. xmlFree((xmlChar *) ctxt->encoding);
  9214. ctxt->encoding = encoding;
  9215. }
  9216. /*
  9217. * UTF-8 encoding is handled natively
  9218. */
  9219. else if ((encoding != NULL) &&
  9220. ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
  9221. (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
  9222. if (ctxt->encoding != NULL)
  9223. xmlFree((xmlChar *) ctxt->encoding);
  9224. ctxt->encoding = encoding;
  9225. }
  9226. else if (encoding != NULL) {
  9227. xmlCharEncodingHandlerPtr handler;
  9228. if (ctxt->input->encoding != NULL)
  9229. xmlFree((xmlChar *) ctxt->input->encoding);
  9230. ctxt->input->encoding = encoding;
  9231. handler = xmlFindCharEncodingHandler((const char *) encoding);
  9232. if (handler != NULL) {
  9233. xmlSwitchToEncoding(ctxt, handler);
  9234. } else {
  9235. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  9236. "Unsupported encoding %s\n", encoding);
  9237. return(NULL);
  9238. }
  9239. }
  9240. }
  9241. return(encoding);
  9242. }
  9243. /**
  9244. * xmlParseSDDecl:
  9245. * @ctxt: an XML parser context
  9246. *
  9247. * parse the XML standalone declaration
  9248. *
  9249. * [32] SDDecl ::= S 'standalone' Eq
  9250. * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
  9251. *
  9252. * [ VC: Standalone Document Declaration ]
  9253. * TODO The standalone document declaration must have the value "no"
  9254. * if any external markup declarations contain declarations of:
  9255. * - attributes with default values, if elements to which these
  9256. * attributes apply appear in the document without specifications
  9257. * of values for these attributes, or
  9258. * - entities (other than amp, lt, gt, apos, quot), if references
  9259. * to those entities appear in the document, or
  9260. * - attributes with values subject to normalization, where the
  9261. * attribute appears in the document with a value which will change
  9262. * as a result of normalization, or
  9263. * - element types with element content, if white space occurs directly
  9264. * within any instance of those types.
  9265. *
  9266. * Returns:
  9267. * 1 if standalone="yes"
  9268. * 0 if standalone="no"
  9269. * -2 if standalone attribute is missing or invalid
  9270. * (A standalone value of -2 means that the XML declaration was found,
  9271. * but no value was specified for the standalone attribute).
  9272. */
  9273. int
  9274. xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
  9275. int standalone = -2;
  9276. SKIP_BLANKS;
  9277. if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
  9278. SKIP(10);
  9279. SKIP_BLANKS;
  9280. if (RAW != '=') {
  9281. xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
  9282. return(standalone);
  9283. }
  9284. NEXT;
  9285. SKIP_BLANKS;
  9286. if (RAW == '\''){
  9287. NEXT;
  9288. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9289. standalone = 0;
  9290. SKIP(2);
  9291. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9292. (NXT(2) == 's')) {
  9293. standalone = 1;
  9294. SKIP(3);
  9295. } else {
  9296. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9297. }
  9298. if (RAW != '\'') {
  9299. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9300. } else
  9301. NEXT;
  9302. } else if (RAW == '"'){
  9303. NEXT;
  9304. if ((RAW == 'n') && (NXT(1) == 'o')) {
  9305. standalone = 0;
  9306. SKIP(2);
  9307. } else if ((RAW == 'y') && (NXT(1) == 'e') &&
  9308. (NXT(2) == 's')) {
  9309. standalone = 1;
  9310. SKIP(3);
  9311. } else {
  9312. xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
  9313. }
  9314. if (RAW != '"') {
  9315. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
  9316. } else
  9317. NEXT;
  9318. } else {
  9319. xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
  9320. }
  9321. }
  9322. return(standalone);
  9323. }
  9324. /**
  9325. * xmlParseXMLDecl:
  9326. * @ctxt: an XML parser context
  9327. *
  9328. * parse an XML declaration header
  9329. *
  9330. * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  9331. */
  9332. void
  9333. xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
  9334. xmlChar *version;
  9335. /*
  9336. * This value for standalone indicates that the document has an
  9337. * XML declaration but it does not have a standalone attribute.
  9338. * It will be overwritten later if a standalone attribute is found.
  9339. */
  9340. ctxt->input->standalone = -2;
  9341. /*
  9342. * We know that '<?xml' is here.
  9343. */
  9344. SKIP(5);
  9345. if (!IS_BLANK_CH(RAW)) {
  9346. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
  9347. "Blank needed after '<?xml'\n");
  9348. }
  9349. SKIP_BLANKS;
  9350. /*
  9351. * We must have the VersionInfo here.
  9352. */
  9353. version = xmlParseVersionInfo(ctxt);
  9354. if (version == NULL) {
  9355. xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
  9356. } else {
  9357. if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
  9358. /*
  9359. * Changed here for XML-1.0 5th edition
  9360. */
  9361. if (ctxt->options & XML_PARSE_OLD10) {
  9362. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9363. "Unsupported version '%s'\n",
  9364. version);
  9365. } else {
  9366. if ((version[0] == '1') && ((version[1] == '.'))) {
  9367. xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
  9368. "Unsupported version '%s'\n",
  9369. version, NULL);
  9370. } else {
  9371. xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
  9372. "Unsupported version '%s'\n",
  9373. version);
  9374. }
  9375. }
  9376. }
  9377. if (ctxt->version != NULL)
  9378. xmlFree((void *) ctxt->version);
  9379. ctxt->version = version;
  9380. }
  9381. /*
  9382. * We may have the encoding declaration
  9383. */
  9384. if (!IS_BLANK_CH(RAW)) {
  9385. if ((RAW == '?') && (NXT(1) == '>')) {
  9386. SKIP(2);
  9387. return;
  9388. }
  9389. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9390. }
  9391. xmlParseEncodingDecl(ctxt);
  9392. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  9393. /*
  9394. * The XML REC instructs us to stop parsing right here
  9395. */
  9396. return;
  9397. }
  9398. /*
  9399. * We may have the standalone status.
  9400. */
  9401. if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
  9402. if ((RAW == '?') && (NXT(1) == '>')) {
  9403. SKIP(2);
  9404. return;
  9405. }
  9406. xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
  9407. }
  9408. /*
  9409. * We can grow the input buffer freely at that point
  9410. */
  9411. GROW;
  9412. SKIP_BLANKS;
  9413. ctxt->input->standalone = xmlParseSDDecl(ctxt);
  9414. SKIP_BLANKS;
  9415. if ((RAW == '?') && (NXT(1) == '>')) {
  9416. SKIP(2);
  9417. } else if (RAW == '>') {
  9418. /* Deprecated old WD ... */
  9419. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9420. NEXT;
  9421. } else {
  9422. xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
  9423. MOVETO_ENDTAG(CUR_PTR);
  9424. NEXT;
  9425. }
  9426. }
  9427. /**
  9428. * xmlParseMisc:
  9429. * @ctxt: an XML parser context
  9430. *
  9431. * parse an XML Misc* optional field.
  9432. *
  9433. * [27] Misc ::= Comment | PI | S
  9434. */
  9435. void
  9436. xmlParseMisc(xmlParserCtxtPtr ctxt) {
  9437. while (((RAW == '<') && (NXT(1) == '?')) ||
  9438. (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
  9439. IS_BLANK_CH(CUR)) {
  9440. if ((RAW == '<') && (NXT(1) == '?')) {
  9441. xmlParsePI(ctxt);
  9442. } else if (IS_BLANK_CH(CUR)) {
  9443. NEXT;
  9444. } else
  9445. xmlParseComment(ctxt);
  9446. }
  9447. }
  9448. /**
  9449. * xmlParseDocument:
  9450. * @ctxt: an XML parser context
  9451. *
  9452. * parse an XML document (and build a tree if using the standard SAX
  9453. * interface).
  9454. *
  9455. * [1] document ::= prolog element Misc*
  9456. *
  9457. * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
  9458. *
  9459. * Returns 0, -1 in case of error. the parser context is augmented
  9460. * as a result of the parsing.
  9461. */
  9462. int
  9463. xmlParseDocument(xmlParserCtxtPtr ctxt) {
  9464. xmlChar start[4];
  9465. xmlCharEncoding enc;
  9466. xmlInitParser();
  9467. if ((ctxt == NULL) || (ctxt->input == NULL))
  9468. return(-1);
  9469. GROW;
  9470. /*
  9471. * SAX: detecting the level.
  9472. */
  9473. xmlDetectSAX2(ctxt);
  9474. /*
  9475. * SAX: beginning of the document processing.
  9476. */
  9477. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  9478. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  9479. if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
  9480. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  9481. /*
  9482. * Get the 4 first bytes and decode the charset
  9483. * if enc != XML_CHAR_ENCODING_NONE
  9484. * plug some encoding conversion routines.
  9485. */
  9486. start[0] = RAW;
  9487. start[1] = NXT(1);
  9488. start[2] = NXT(2);
  9489. start[3] = NXT(3);
  9490. enc = xmlDetectCharEncoding(&start[0], 4);
  9491. if (enc != XML_CHAR_ENCODING_NONE) {
  9492. xmlSwitchEncoding(ctxt, enc);
  9493. }
  9494. }
  9495. if (CUR == 0) {
  9496. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  9497. }
  9498. /*
  9499. * Check for the XMLDecl in the Prolog.
  9500. * do not GROW here to avoid the detected encoder to decode more
  9501. * than just the first line, unless the amount of data is really
  9502. * too small to hold "<?xml version="1.0" encoding="foo"
  9503. */
  9504. if ((ctxt->input->end - ctxt->input->cur) < 35) {
  9505. GROW;
  9506. }
  9507. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  9508. /*
  9509. * Note that we will switch encoding on the fly.
  9510. */
  9511. xmlParseXMLDecl(ctxt);
  9512. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  9513. /*
  9514. * The XML REC instructs us to stop parsing right here
  9515. */
  9516. return(-1);
  9517. }
  9518. ctxt->standalone = ctxt->input->standalone;
  9519. SKIP_BLANKS;
  9520. } else {
  9521. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  9522. }
  9523. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  9524. ctxt->sax->startDocument(ctxt->userData);
  9525. /*
  9526. * The Misc part of the Prolog
  9527. */
  9528. GROW;
  9529. xmlParseMisc(ctxt);
  9530. /*
  9531. * Then possibly doc type declaration(s) and more Misc
  9532. * (doctypedecl Misc*)?
  9533. */
  9534. GROW;
  9535. if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
  9536. ctxt->inSubset = 1;
  9537. xmlParseDocTypeDecl(ctxt);
  9538. if (RAW == '[') {
  9539. ctxt->instate = XML_PARSER_DTD;
  9540. xmlParseInternalSubset(ctxt);
  9541. }
  9542. /*
  9543. * Create and update the external subset.
  9544. */
  9545. ctxt->inSubset = 2;
  9546. if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
  9547. (!ctxt->disableSAX))
  9548. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  9549. ctxt->extSubSystem, ctxt->extSubURI);
  9550. ctxt->inSubset = 0;
  9551. xmlCleanSpecialAttr(ctxt);
  9552. ctxt->instate = XML_PARSER_PROLOG;
  9553. xmlParseMisc(ctxt);
  9554. }
  9555. /*
  9556. * Time to start parsing the tree itself
  9557. */
  9558. GROW;
  9559. if (RAW != '<') {
  9560. xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
  9561. "Start tag expected, '<' not found\n");
  9562. } else {
  9563. ctxt->instate = XML_PARSER_CONTENT;
  9564. xmlParseElement(ctxt);
  9565. ctxt->instate = XML_PARSER_EPILOG;
  9566. /*
  9567. * The Misc part at the end
  9568. */
  9569. xmlParseMisc(ctxt);
  9570. if (RAW != 0) {
  9571. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  9572. }
  9573. ctxt->instate = XML_PARSER_EOF;
  9574. }
  9575. /*
  9576. * SAX: end of the document processing.
  9577. */
  9578. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  9579. ctxt->sax->endDocument(ctxt->userData);
  9580. /*
  9581. * Remove locally kept entity definitions if the tree was not built
  9582. */
  9583. if ((ctxt->myDoc != NULL) &&
  9584. (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
  9585. xmlFreeDoc(ctxt->myDoc);
  9586. ctxt->myDoc = NULL;
  9587. }
  9588. if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
  9589. ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
  9590. if (ctxt->valid)
  9591. ctxt->myDoc->properties |= XML_DOC_DTDVALID;
  9592. if (ctxt->nsWellFormed)
  9593. ctxt->myDoc->properties |= XML_DOC_NSVALID;
  9594. if (ctxt->options & XML_PARSE_OLD10)
  9595. ctxt->myDoc->properties |= XML_DOC_OLD10;
  9596. }
  9597. if (! ctxt->wellFormed) {
  9598. ctxt->valid = 0;
  9599. return(-1);
  9600. }
  9601. return(0);
  9602. }
  9603. /**
  9604. * xmlParseExtParsedEnt:
  9605. * @ctxt: an XML parser context
  9606. *
  9607. * parse a general parsed entity
  9608. * An external general parsed entity is well-formed if it matches the
  9609. * production labeled extParsedEnt.
  9610. *
  9611. * [78] extParsedEnt ::= TextDecl? content
  9612. *
  9613. * Returns 0, -1 in case of error. the parser context is augmented
  9614. * as a result of the parsing.
  9615. */
  9616. int
  9617. xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
  9618. xmlChar start[4];
  9619. xmlCharEncoding enc;
  9620. if ((ctxt == NULL) || (ctxt->input == NULL))
  9621. return(-1);
  9622. xmlDefaultSAXHandlerInit();
  9623. xmlDetectSAX2(ctxt);
  9624. GROW;
  9625. /*
  9626. * SAX: beginning of the document processing.
  9627. */
  9628. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  9629. ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
  9630. /*
  9631. * Get the 4 first bytes and decode the charset
  9632. * if enc != XML_CHAR_ENCODING_NONE
  9633. * plug some encoding conversion routines.
  9634. */
  9635. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  9636. start[0] = RAW;
  9637. start[1] = NXT(1);
  9638. start[2] = NXT(2);
  9639. start[3] = NXT(3);
  9640. enc = xmlDetectCharEncoding(start, 4);
  9641. if (enc != XML_CHAR_ENCODING_NONE) {
  9642. xmlSwitchEncoding(ctxt, enc);
  9643. }
  9644. }
  9645. if (CUR == 0) {
  9646. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  9647. }
  9648. /*
  9649. * Check for the XMLDecl in the Prolog.
  9650. */
  9651. GROW;
  9652. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  9653. /*
  9654. * Note that we will switch encoding on the fly.
  9655. */
  9656. xmlParseXMLDecl(ctxt);
  9657. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  9658. /*
  9659. * The XML REC instructs us to stop parsing right here
  9660. */
  9661. return(-1);
  9662. }
  9663. SKIP_BLANKS;
  9664. } else {
  9665. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  9666. }
  9667. if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
  9668. ctxt->sax->startDocument(ctxt->userData);
  9669. /*
  9670. * Doing validity checking on chunk doesn't make sense
  9671. */
  9672. ctxt->instate = XML_PARSER_CONTENT;
  9673. ctxt->validate = 0;
  9674. ctxt->loadsubset = 0;
  9675. ctxt->depth = 0;
  9676. xmlParseContent(ctxt);
  9677. if ((RAW == '<') && (NXT(1) == '/')) {
  9678. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  9679. } else if (RAW != 0) {
  9680. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  9681. }
  9682. /*
  9683. * SAX: end of the document processing.
  9684. */
  9685. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  9686. ctxt->sax->endDocument(ctxt->userData);
  9687. if (! ctxt->wellFormed) return(-1);
  9688. return(0);
  9689. }
  9690. #ifdef LIBXML_PUSH_ENABLED
  9691. /************************************************************************
  9692. * *
  9693. * Progressive parsing interfaces *
  9694. * *
  9695. ************************************************************************/
  9696. /**
  9697. * xmlParseLookupSequence:
  9698. * @ctxt: an XML parser context
  9699. * @first: the first char to lookup
  9700. * @next: the next char to lookup or zero
  9701. * @third: the next char to lookup or zero
  9702. *
  9703. * Try to find if a sequence (first, next, third) or just (first next) or
  9704. * (first) is available in the input stream.
  9705. * This function has a side effect of (possibly) incrementing ctxt->checkIndex
  9706. * to avoid rescanning sequences of bytes, it DOES change the state of the
  9707. * parser, do not use liberally.
  9708. *
  9709. * Returns the index to the current parsing point if the full sequence
  9710. * is available, -1 otherwise.
  9711. */
  9712. static int
  9713. xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
  9714. xmlChar next, xmlChar third) {
  9715. int base, len;
  9716. xmlParserInputPtr in;
  9717. const xmlChar *buf;
  9718. in = ctxt->input;
  9719. if (in == NULL) return(-1);
  9720. base = in->cur - in->base;
  9721. if (base < 0) return(-1);
  9722. if (ctxt->checkIndex > base)
  9723. base = ctxt->checkIndex;
  9724. if (in->buf == NULL) {
  9725. buf = in->base;
  9726. len = in->length;
  9727. } else {
  9728. buf = in->buf->buffer->content;
  9729. len = in->buf->buffer->use;
  9730. }
  9731. /* take into account the sequence length */
  9732. if (third) len -= 2;
  9733. else if (next) len --;
  9734. for (;base < len;base++) {
  9735. if (buf[base] == first) {
  9736. if (third != 0) {
  9737. if ((buf[base + 1] != next) ||
  9738. (buf[base + 2] != third)) continue;
  9739. } else if (next != 0) {
  9740. if (buf[base + 1] != next) continue;
  9741. }
  9742. ctxt->checkIndex = 0;
  9743. #ifdef DEBUG_PUSH
  9744. if (next == 0)
  9745. xmlGenericError(xmlGenericErrorContext,
  9746. "PP: lookup '%c' found at %d\n",
  9747. first, base);
  9748. else if (third == 0)
  9749. xmlGenericError(xmlGenericErrorContext,
  9750. "PP: lookup '%c%c' found at %d\n",
  9751. first, next, base);
  9752. else
  9753. xmlGenericError(xmlGenericErrorContext,
  9754. "PP: lookup '%c%c%c' found at %d\n",
  9755. first, next, third, base);
  9756. #endif
  9757. return(base - (in->cur - in->base));
  9758. }
  9759. }
  9760. ctxt->checkIndex = base;
  9761. #ifdef DEBUG_PUSH
  9762. if (next == 0)
  9763. xmlGenericError(xmlGenericErrorContext,
  9764. "PP: lookup '%c' failed\n", first);
  9765. else if (third == 0)
  9766. xmlGenericError(xmlGenericErrorContext,
  9767. "PP: lookup '%c%c' failed\n", first, next);
  9768. else
  9769. xmlGenericError(xmlGenericErrorContext,
  9770. "PP: lookup '%c%c%c' failed\n", first, next, third);
  9771. #endif
  9772. return(-1);
  9773. }
  9774. /**
  9775. * xmlParseGetLasts:
  9776. * @ctxt: an XML parser context
  9777. * @lastlt: pointer to store the last '<' from the input
  9778. * @lastgt: pointer to store the last '>' from the input
  9779. *
  9780. * Lookup the last < and > in the current chunk
  9781. */
  9782. static void
  9783. xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
  9784. const xmlChar **lastgt) {
  9785. const xmlChar *tmp;
  9786. if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
  9787. xmlGenericError(xmlGenericErrorContext,
  9788. "Internal error: xmlParseGetLasts\n");
  9789. return;
  9790. }
  9791. if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
  9792. tmp = ctxt->input->end;
  9793. tmp--;
  9794. while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
  9795. if (tmp < ctxt->input->base) {
  9796. *lastlt = NULL;
  9797. *lastgt = NULL;
  9798. } else {
  9799. *lastlt = tmp;
  9800. tmp++;
  9801. while ((tmp < ctxt->input->end) && (*tmp != '>')) {
  9802. if (*tmp == '\'') {
  9803. tmp++;
  9804. while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
  9805. if (tmp < ctxt->input->end) tmp++;
  9806. } else if (*tmp == '"') {
  9807. tmp++;
  9808. while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
  9809. if (tmp < ctxt->input->end) tmp++;
  9810. } else
  9811. tmp++;
  9812. }
  9813. if (tmp < ctxt->input->end)
  9814. *lastgt = tmp;
  9815. else {
  9816. tmp = *lastlt;
  9817. tmp--;
  9818. while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
  9819. if (tmp >= ctxt->input->base)
  9820. *lastgt = tmp;
  9821. else
  9822. *lastgt = NULL;
  9823. }
  9824. }
  9825. } else {
  9826. *lastlt = NULL;
  9827. *lastgt = NULL;
  9828. }
  9829. }
  9830. /**
  9831. * xmlCheckCdataPush:
  9832. * @cur: pointer to the bock of characters
  9833. * @len: length of the block in bytes
  9834. *
  9835. * Check that the block of characters is okay as SCdata content [20]
  9836. *
  9837. * Returns the number of bytes to pass if okay, a negative index where an
  9838. * UTF-8 error occured otherwise
  9839. */
  9840. static int
  9841. xmlCheckCdataPush(const xmlChar *utf, int len) {
  9842. int ix;
  9843. unsigned char c;
  9844. int codepoint;
  9845. if ((utf == NULL) || (len <= 0))
  9846. return(0);
  9847. for (ix = 0; ix < len;) { /* string is 0-terminated */
  9848. c = utf[ix];
  9849. if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
  9850. if (c >= 0x20)
  9851. ix++;
  9852. else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
  9853. ix++;
  9854. else
  9855. return(-ix);
  9856. } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
  9857. if (ix + 2 > len) return(ix);
  9858. if ((utf[ix+1] & 0xc0 ) != 0x80)
  9859. return(-ix);
  9860. codepoint = (utf[ix] & 0x1f) << 6;
  9861. codepoint |= utf[ix+1] & 0x3f;
  9862. if (!xmlIsCharQ(codepoint))
  9863. return(-ix);
  9864. ix += 2;
  9865. } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
  9866. if (ix + 3 > len) return(ix);
  9867. if (((utf[ix+1] & 0xc0) != 0x80) ||
  9868. ((utf[ix+2] & 0xc0) != 0x80))
  9869. return(-ix);
  9870. codepoint = (utf[ix] & 0xf) << 12;
  9871. codepoint |= (utf[ix+1] & 0x3f) << 6;
  9872. codepoint |= utf[ix+2] & 0x3f;
  9873. if (!xmlIsCharQ(codepoint))
  9874. return(-ix);
  9875. ix += 3;
  9876. } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
  9877. if (ix + 4 > len) return(ix);
  9878. if (((utf[ix+1] & 0xc0) != 0x80) ||
  9879. ((utf[ix+2] & 0xc0) != 0x80) ||
  9880. ((utf[ix+3] & 0xc0) != 0x80))
  9881. return(-ix);
  9882. codepoint = (utf[ix] & 0x7) << 18;
  9883. codepoint |= (utf[ix+1] & 0x3f) << 12;
  9884. codepoint |= (utf[ix+2] & 0x3f) << 6;
  9885. codepoint |= utf[ix+3] & 0x3f;
  9886. if (!xmlIsCharQ(codepoint))
  9887. return(-ix);
  9888. ix += 4;
  9889. } else /* unknown encoding */
  9890. return(-ix);
  9891. }
  9892. return(ix);
  9893. }
  9894. /**
  9895. * xmlParseTryOrFinish:
  9896. * @ctxt: an XML parser context
  9897. * @terminate: last chunk indicator
  9898. *
  9899. * Try to progress on parsing
  9900. *
  9901. * Returns zero if no parsing was possible
  9902. */
  9903. static int
  9904. xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
  9905. int ret = 0;
  9906. int avail, tlen;
  9907. xmlChar cur, next;
  9908. const xmlChar *lastlt, *lastgt;
  9909. if (ctxt->input == NULL)
  9910. return(0);
  9911. #ifdef DEBUG_PUSH
  9912. switch (ctxt->instate) {
  9913. case XML_PARSER_EOF:
  9914. xmlGenericError(xmlGenericErrorContext,
  9915. "PP: try EOF\n"); break;
  9916. case XML_PARSER_START:
  9917. xmlGenericError(xmlGenericErrorContext,
  9918. "PP: try START\n"); break;
  9919. case XML_PARSER_MISC:
  9920. xmlGenericError(xmlGenericErrorContext,
  9921. "PP: try MISC\n");break;
  9922. case XML_PARSER_COMMENT:
  9923. xmlGenericError(xmlGenericErrorContext,
  9924. "PP: try COMMENT\n");break;
  9925. case XML_PARSER_PROLOG:
  9926. xmlGenericError(xmlGenericErrorContext,
  9927. "PP: try PROLOG\n");break;
  9928. case XML_PARSER_START_TAG:
  9929. xmlGenericError(xmlGenericErrorContext,
  9930. "PP: try START_TAG\n");break;
  9931. case XML_PARSER_CONTENT:
  9932. xmlGenericError(xmlGenericErrorContext,
  9933. "PP: try CONTENT\n");break;
  9934. case XML_PARSER_CDATA_SECTION:
  9935. xmlGenericError(xmlGenericErrorContext,
  9936. "PP: try CDATA_SECTION\n");break;
  9937. case XML_PARSER_END_TAG:
  9938. xmlGenericError(xmlGenericErrorContext,
  9939. "PP: try END_TAG\n");break;
  9940. case XML_PARSER_ENTITY_DECL:
  9941. xmlGenericError(xmlGenericErrorContext,
  9942. "PP: try ENTITY_DECL\n");break;
  9943. case XML_PARSER_ENTITY_VALUE:
  9944. xmlGenericError(xmlGenericErrorContext,
  9945. "PP: try ENTITY_VALUE\n");break;
  9946. case XML_PARSER_ATTRIBUTE_VALUE:
  9947. xmlGenericError(xmlGenericErrorContext,
  9948. "PP: try ATTRIBUTE_VALUE\n");break;
  9949. case XML_PARSER_DTD:
  9950. xmlGenericError(xmlGenericErrorContext,
  9951. "PP: try DTD\n");break;
  9952. case XML_PARSER_EPILOG:
  9953. xmlGenericError(xmlGenericErrorContext,
  9954. "PP: try EPILOG\n");break;
  9955. case XML_PARSER_PI:
  9956. xmlGenericError(xmlGenericErrorContext,
  9957. "PP: try PI\n");break;
  9958. case XML_PARSER_IGNORE:
  9959. xmlGenericError(xmlGenericErrorContext,
  9960. "PP: try IGNORE\n");break;
  9961. }
  9962. #endif
  9963. if ((ctxt->input != NULL) &&
  9964. (ctxt->input->cur - ctxt->input->base > 4096)) {
  9965. xmlSHRINK(ctxt);
  9966. ctxt->checkIndex = 0;
  9967. }
  9968. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  9969. while (1) {
  9970. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  9971. return(0);
  9972. /*
  9973. * Pop-up of finished entities.
  9974. */
  9975. while ((RAW == 0) && (ctxt->inputNr > 1))
  9976. xmlPopInput(ctxt);
  9977. if (ctxt->input == NULL) break;
  9978. if (ctxt->input->buf == NULL)
  9979. avail = ctxt->input->length -
  9980. (ctxt->input->cur - ctxt->input->base);
  9981. else {
  9982. /*
  9983. * If we are operating on converted input, try to flush
  9984. * remainng chars to avoid them stalling in the non-converted
  9985. * buffer.
  9986. */
  9987. if ((ctxt->input->buf->raw != NULL) &&
  9988. (ctxt->input->buf->raw->use > 0)) {
  9989. int base = ctxt->input->base -
  9990. ctxt->input->buf->buffer->content;
  9991. int current = ctxt->input->cur - ctxt->input->base;
  9992. xmlParserInputBufferPush(ctxt->input->buf, 0, "");
  9993. ctxt->input->base = ctxt->input->buf->buffer->content + base;
  9994. ctxt->input->cur = ctxt->input->base + current;
  9995. ctxt->input->end =
  9996. &ctxt->input->buf->buffer->content[
  9997. ctxt->input->buf->buffer->use];
  9998. }
  9999. avail = ctxt->input->buf->buffer->use -
  10000. (ctxt->input->cur - ctxt->input->base);
  10001. }
  10002. if (avail < 1)
  10003. goto done;
  10004. switch (ctxt->instate) {
  10005. case XML_PARSER_EOF:
  10006. /*
  10007. * Document parsing is done !
  10008. */
  10009. goto done;
  10010. case XML_PARSER_START:
  10011. if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
  10012. xmlChar start[4];
  10013. xmlCharEncoding enc;
  10014. /*
  10015. * Very first chars read from the document flow.
  10016. */
  10017. if (avail < 4)
  10018. goto done;
  10019. /*
  10020. * Get the 4 first bytes and decode the charset
  10021. * if enc != XML_CHAR_ENCODING_NONE
  10022. * plug some encoding conversion routines,
  10023. * else xmlSwitchEncoding will set to (default)
  10024. * UTF8.
  10025. */
  10026. start[0] = RAW;
  10027. start[1] = NXT(1);
  10028. start[2] = NXT(2);
  10029. start[3] = NXT(3);
  10030. enc = xmlDetectCharEncoding(start, 4);
  10031. xmlSwitchEncoding(ctxt, enc);
  10032. break;
  10033. }
  10034. if (avail < 2)
  10035. goto done;
  10036. cur = ctxt->input->cur[0];
  10037. next = ctxt->input->cur[1];
  10038. if (cur == 0) {
  10039. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10040. ctxt->sax->setDocumentLocator(ctxt->userData,
  10041. &xmlDefaultSAXLocator);
  10042. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10043. ctxt->instate = XML_PARSER_EOF;
  10044. #ifdef DEBUG_PUSH
  10045. xmlGenericError(xmlGenericErrorContext,
  10046. "PP: entering EOF\n");
  10047. #endif
  10048. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10049. ctxt->sax->endDocument(ctxt->userData);
  10050. goto done;
  10051. }
  10052. if ((cur == '<') && (next == '?')) {
  10053. /* PI or XML decl */
  10054. if (avail < 5) return(ret);
  10055. if ((!terminate) &&
  10056. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10057. return(ret);
  10058. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10059. ctxt->sax->setDocumentLocator(ctxt->userData,
  10060. &xmlDefaultSAXLocator);
  10061. if ((ctxt->input->cur[2] == 'x') &&
  10062. (ctxt->input->cur[3] == 'm') &&
  10063. (ctxt->input->cur[4] == 'l') &&
  10064. (IS_BLANK_CH(ctxt->input->cur[5]))) {
  10065. ret += 5;
  10066. #ifdef DEBUG_PUSH
  10067. xmlGenericError(xmlGenericErrorContext,
  10068. "PP: Parsing XML Decl\n");
  10069. #endif
  10070. xmlParseXMLDecl(ctxt);
  10071. if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
  10072. /*
  10073. * The XML REC instructs us to stop parsing right
  10074. * here
  10075. */
  10076. ctxt->instate = XML_PARSER_EOF;
  10077. return(0);
  10078. }
  10079. ctxt->standalone = ctxt->input->standalone;
  10080. if ((ctxt->encoding == NULL) &&
  10081. (ctxt->input->encoding != NULL))
  10082. ctxt->encoding = xmlStrdup(ctxt->input->encoding);
  10083. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10084. (!ctxt->disableSAX))
  10085. ctxt->sax->startDocument(ctxt->userData);
  10086. ctxt->instate = XML_PARSER_MISC;
  10087. #ifdef DEBUG_PUSH
  10088. xmlGenericError(xmlGenericErrorContext,
  10089. "PP: entering MISC\n");
  10090. #endif
  10091. } else {
  10092. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10093. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10094. (!ctxt->disableSAX))
  10095. ctxt->sax->startDocument(ctxt->userData);
  10096. ctxt->instate = XML_PARSER_MISC;
  10097. #ifdef DEBUG_PUSH
  10098. xmlGenericError(xmlGenericErrorContext,
  10099. "PP: entering MISC\n");
  10100. #endif
  10101. }
  10102. } else {
  10103. if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
  10104. ctxt->sax->setDocumentLocator(ctxt->userData,
  10105. &xmlDefaultSAXLocator);
  10106. ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
  10107. if (ctxt->version == NULL) {
  10108. xmlErrMemory(ctxt, NULL);
  10109. break;
  10110. }
  10111. if ((ctxt->sax) && (ctxt->sax->startDocument) &&
  10112. (!ctxt->disableSAX))
  10113. ctxt->sax->startDocument(ctxt->userData);
  10114. ctxt->instate = XML_PARSER_MISC;
  10115. #ifdef DEBUG_PUSH
  10116. xmlGenericError(xmlGenericErrorContext,
  10117. "PP: entering MISC\n");
  10118. #endif
  10119. }
  10120. break;
  10121. case XML_PARSER_START_TAG: {
  10122. const xmlChar *name;
  10123. const xmlChar *prefix = NULL;
  10124. const xmlChar *URI = NULL;
  10125. int nsNr = ctxt->nsNr;
  10126. if ((avail < 2) && (ctxt->inputNr == 1))
  10127. goto done;
  10128. cur = ctxt->input->cur[0];
  10129. if (cur != '<') {
  10130. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
  10131. ctxt->instate = XML_PARSER_EOF;
  10132. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10133. ctxt->sax->endDocument(ctxt->userData);
  10134. goto done;
  10135. }
  10136. if (!terminate) {
  10137. if (ctxt->progressive) {
  10138. /* > can be found unescaped in attribute values */
  10139. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10140. goto done;
  10141. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10142. goto done;
  10143. }
  10144. }
  10145. if (ctxt->spaceNr == 0)
  10146. spacePush(ctxt, -1);
  10147. else if (*ctxt->space == -2)
  10148. spacePush(ctxt, -1);
  10149. else
  10150. spacePush(ctxt, *ctxt->space);
  10151. #ifdef LIBXML_SAX1_ENABLED
  10152. if (ctxt->sax2)
  10153. #endif /* LIBXML_SAX1_ENABLED */
  10154. name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
  10155. #ifdef LIBXML_SAX1_ENABLED
  10156. else
  10157. name = xmlParseStartTag(ctxt);
  10158. #endif /* LIBXML_SAX1_ENABLED */
  10159. if (name == NULL) {
  10160. spacePop(ctxt);
  10161. ctxt->instate = XML_PARSER_EOF;
  10162. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10163. ctxt->sax->endDocument(ctxt->userData);
  10164. goto done;
  10165. }
  10166. #ifdef LIBXML_VALID_ENABLED
  10167. /*
  10168. * [ VC: Root Element Type ]
  10169. * The Name in the document type declaration must match
  10170. * the element type of the root element.
  10171. */
  10172. if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
  10173. ctxt->node && (ctxt->node == ctxt->myDoc->children))
  10174. ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
  10175. #endif /* LIBXML_VALID_ENABLED */
  10176. /*
  10177. * Check for an Empty Element.
  10178. */
  10179. if ((RAW == '/') && (NXT(1) == '>')) {
  10180. SKIP(2);
  10181. if (ctxt->sax2) {
  10182. if ((ctxt->sax != NULL) &&
  10183. (ctxt->sax->endElementNs != NULL) &&
  10184. (!ctxt->disableSAX))
  10185. ctxt->sax->endElementNs(ctxt->userData, name,
  10186. prefix, URI);
  10187. if (ctxt->nsNr - nsNr > 0)
  10188. nsPop(ctxt, ctxt->nsNr - nsNr);
  10189. #ifdef LIBXML_SAX1_ENABLED
  10190. } else {
  10191. if ((ctxt->sax != NULL) &&
  10192. (ctxt->sax->endElement != NULL) &&
  10193. (!ctxt->disableSAX))
  10194. ctxt->sax->endElement(ctxt->userData, name);
  10195. #endif /* LIBXML_SAX1_ENABLED */
  10196. }
  10197. spacePop(ctxt);
  10198. if (ctxt->nameNr == 0) {
  10199. ctxt->instate = XML_PARSER_EPILOG;
  10200. } else {
  10201. ctxt->instate = XML_PARSER_CONTENT;
  10202. }
  10203. break;
  10204. }
  10205. if (RAW == '>') {
  10206. NEXT;
  10207. } else {
  10208. xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
  10209. "Couldn't find end of Start Tag %s\n",
  10210. name);
  10211. nodePop(ctxt);
  10212. spacePop(ctxt);
  10213. }
  10214. if (ctxt->sax2)
  10215. nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
  10216. #ifdef LIBXML_SAX1_ENABLED
  10217. else
  10218. namePush(ctxt, name);
  10219. #endif /* LIBXML_SAX1_ENABLED */
  10220. ctxt->instate = XML_PARSER_CONTENT;
  10221. break;
  10222. }
  10223. case XML_PARSER_CONTENT: {
  10224. const xmlChar *test;
  10225. unsigned int cons;
  10226. if ((avail < 2) && (ctxt->inputNr == 1))
  10227. goto done;
  10228. cur = ctxt->input->cur[0];
  10229. next = ctxt->input->cur[1];
  10230. test = CUR_PTR;
  10231. cons = ctxt->input->consumed;
  10232. if ((cur == '<') && (next == '/')) {
  10233. ctxt->instate = XML_PARSER_END_TAG;
  10234. break;
  10235. } else if ((cur == '<') && (next == '?')) {
  10236. if ((!terminate) &&
  10237. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10238. goto done;
  10239. xmlParsePI(ctxt);
  10240. } else if ((cur == '<') && (next != '!')) {
  10241. ctxt->instate = XML_PARSER_START_TAG;
  10242. break;
  10243. } else if ((cur == '<') && (next == '!') &&
  10244. (ctxt->input->cur[2] == '-') &&
  10245. (ctxt->input->cur[3] == '-')) {
  10246. int term;
  10247. if (avail < 4)
  10248. goto done;
  10249. ctxt->input->cur += 4;
  10250. term = xmlParseLookupSequence(ctxt, '-', '-', '>');
  10251. ctxt->input->cur -= 4;
  10252. if ((!terminate) && (term < 0))
  10253. goto done;
  10254. xmlParseComment(ctxt);
  10255. ctxt->instate = XML_PARSER_CONTENT;
  10256. } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
  10257. (ctxt->input->cur[2] == '[') &&
  10258. (ctxt->input->cur[3] == 'C') &&
  10259. (ctxt->input->cur[4] == 'D') &&
  10260. (ctxt->input->cur[5] == 'A') &&
  10261. (ctxt->input->cur[6] == 'T') &&
  10262. (ctxt->input->cur[7] == 'A') &&
  10263. (ctxt->input->cur[8] == '[')) {
  10264. SKIP(9);
  10265. ctxt->instate = XML_PARSER_CDATA_SECTION;
  10266. break;
  10267. } else if ((cur == '<') && (next == '!') &&
  10268. (avail < 9)) {
  10269. goto done;
  10270. } else if (cur == '&') {
  10271. if ((!terminate) &&
  10272. (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
  10273. goto done;
  10274. xmlParseReference(ctxt);
  10275. } else {
  10276. /* TODO Avoid the extra copy, handle directly !!! */
  10277. /*
  10278. * Goal of the following test is:
  10279. * - minimize calls to the SAX 'character' callback
  10280. * when they are mergeable
  10281. * - handle an problem for isBlank when we only parse
  10282. * a sequence of blank chars and the next one is
  10283. * not available to check against '<' presence.
  10284. * - tries to homogenize the differences in SAX
  10285. * callbacks between the push and pull versions
  10286. * of the parser.
  10287. */
  10288. if ((ctxt->inputNr == 1) &&
  10289. (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
  10290. if (!terminate) {
  10291. if (ctxt->progressive) {
  10292. if ((lastlt == NULL) ||
  10293. (ctxt->input->cur > lastlt))
  10294. goto done;
  10295. } else if (xmlParseLookupSequence(ctxt,
  10296. '<', 0, 0) < 0) {
  10297. goto done;
  10298. }
  10299. }
  10300. }
  10301. ctxt->checkIndex = 0;
  10302. xmlParseCharData(ctxt, 0);
  10303. }
  10304. /*
  10305. * Pop-up of finished entities.
  10306. */
  10307. while ((RAW == 0) && (ctxt->inputNr > 1))
  10308. xmlPopInput(ctxt);
  10309. if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  10310. xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
  10311. "detected an error in element content\n");
  10312. ctxt->instate = XML_PARSER_EOF;
  10313. break;
  10314. }
  10315. break;
  10316. }
  10317. case XML_PARSER_END_TAG:
  10318. if (avail < 2)
  10319. goto done;
  10320. if (!terminate) {
  10321. if (ctxt->progressive) {
  10322. /* > can be found unescaped in attribute values */
  10323. if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
  10324. goto done;
  10325. } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
  10326. goto done;
  10327. }
  10328. }
  10329. if (ctxt->sax2) {
  10330. xmlParseEndTag2(ctxt,
  10331. (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
  10332. (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
  10333. (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
  10334. nameNsPop(ctxt);
  10335. }
  10336. #ifdef LIBXML_SAX1_ENABLED
  10337. else
  10338. xmlParseEndTag1(ctxt, 0);
  10339. #endif /* LIBXML_SAX1_ENABLED */
  10340. if (ctxt->nameNr == 0) {
  10341. ctxt->instate = XML_PARSER_EPILOG;
  10342. } else {
  10343. ctxt->instate = XML_PARSER_CONTENT;
  10344. }
  10345. break;
  10346. case XML_PARSER_CDATA_SECTION: {
  10347. /*
  10348. * The Push mode need to have the SAX callback for
  10349. * cdataBlock merge back contiguous callbacks.
  10350. */
  10351. int base;
  10352. base = xmlParseLookupSequence(ctxt, ']', ']', '>');
  10353. if (base < 0) {
  10354. if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
  10355. int tmp;
  10356. tmp = xmlCheckCdataPush(ctxt->input->cur,
  10357. XML_PARSER_BIG_BUFFER_SIZE);
  10358. if (tmp < 0) {
  10359. tmp = -tmp;
  10360. ctxt->input->cur += tmp;
  10361. goto encoding_error;
  10362. }
  10363. if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
  10364. if (ctxt->sax->cdataBlock != NULL)
  10365. ctxt->sax->cdataBlock(ctxt->userData,
  10366. ctxt->input->cur, tmp);
  10367. else if (ctxt->sax->characters != NULL)
  10368. ctxt->sax->characters(ctxt->userData,
  10369. ctxt->input->cur, tmp);
  10370. }
  10371. SKIPL(tmp);
  10372. ctxt->checkIndex = 0;
  10373. }
  10374. goto done;
  10375. } else {
  10376. int tmp;
  10377. tmp = xmlCheckCdataPush(ctxt->input->cur, base);
  10378. if ((tmp < 0) || (tmp != base)) {
  10379. tmp = -tmp;
  10380. ctxt->input->cur += tmp;
  10381. goto encoding_error;
  10382. }
  10383. if ((ctxt->sax != NULL) && (base == 0) &&
  10384. (ctxt->sax->cdataBlock != NULL) &&
  10385. (!ctxt->disableSAX)) {
  10386. /*
  10387. * Special case to provide identical behaviour
  10388. * between pull and push parsers on enpty CDATA
  10389. * sections
  10390. */
  10391. if ((ctxt->input->cur - ctxt->input->base >= 9) &&
  10392. (!strncmp((const char *)&ctxt->input->cur[-9],
  10393. "<![CDATA[", 9)))
  10394. ctxt->sax->cdataBlock(ctxt->userData,
  10395. BAD_CAST "", 0);
  10396. } else if ((ctxt->sax != NULL) && (base > 0) &&
  10397. (!ctxt->disableSAX)) {
  10398. if (ctxt->sax->cdataBlock != NULL)
  10399. ctxt->sax->cdataBlock(ctxt->userData,
  10400. ctxt->input->cur, base);
  10401. else if (ctxt->sax->characters != NULL)
  10402. ctxt->sax->characters(ctxt->userData,
  10403. ctxt->input->cur, base);
  10404. }
  10405. SKIPL(base + 3);
  10406. ctxt->checkIndex = 0;
  10407. ctxt->instate = XML_PARSER_CONTENT;
  10408. #ifdef DEBUG_PUSH
  10409. xmlGenericError(xmlGenericErrorContext,
  10410. "PP: entering CONTENT\n");
  10411. #endif
  10412. }
  10413. break;
  10414. }
  10415. case XML_PARSER_MISC:
  10416. SKIP_BLANKS;
  10417. if (ctxt->input->buf == NULL)
  10418. avail = ctxt->input->length -
  10419. (ctxt->input->cur - ctxt->input->base);
  10420. else
  10421. avail = ctxt->input->buf->buffer->use -
  10422. (ctxt->input->cur - ctxt->input->base);
  10423. if (avail < 2)
  10424. goto done;
  10425. cur = ctxt->input->cur[0];
  10426. next = ctxt->input->cur[1];
  10427. if ((cur == '<') && (next == '?')) {
  10428. if ((!terminate) &&
  10429. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10430. goto done;
  10431. #ifdef DEBUG_PUSH
  10432. xmlGenericError(xmlGenericErrorContext,
  10433. "PP: Parsing PI\n");
  10434. #endif
  10435. xmlParsePI(ctxt);
  10436. ctxt->checkIndex = 0;
  10437. } else if ((cur == '<') && (next == '!') &&
  10438. (ctxt->input->cur[2] == '-') &&
  10439. (ctxt->input->cur[3] == '-')) {
  10440. if ((!terminate) &&
  10441. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
  10442. goto done;
  10443. #ifdef DEBUG_PUSH
  10444. xmlGenericError(xmlGenericErrorContext,
  10445. "PP: Parsing Comment\n");
  10446. #endif
  10447. xmlParseComment(ctxt);
  10448. ctxt->instate = XML_PARSER_MISC;
  10449. ctxt->checkIndex = 0;
  10450. } else if ((cur == '<') && (next == '!') &&
  10451. (ctxt->input->cur[2] == 'D') &&
  10452. (ctxt->input->cur[3] == 'O') &&
  10453. (ctxt->input->cur[4] == 'C') &&
  10454. (ctxt->input->cur[5] == 'T') &&
  10455. (ctxt->input->cur[6] == 'Y') &&
  10456. (ctxt->input->cur[7] == 'P') &&
  10457. (ctxt->input->cur[8] == 'E')) {
  10458. if ((!terminate) &&
  10459. (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
  10460. goto done;
  10461. #ifdef DEBUG_PUSH
  10462. xmlGenericError(xmlGenericErrorContext,
  10463. "PP: Parsing internal subset\n");
  10464. #endif
  10465. ctxt->inSubset = 1;
  10466. xmlParseDocTypeDecl(ctxt);
  10467. if (RAW == '[') {
  10468. ctxt->instate = XML_PARSER_DTD;
  10469. #ifdef DEBUG_PUSH
  10470. xmlGenericError(xmlGenericErrorContext,
  10471. "PP: entering DTD\n");
  10472. #endif
  10473. } else {
  10474. /*
  10475. * Create and update the external subset.
  10476. */
  10477. ctxt->inSubset = 2;
  10478. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  10479. (ctxt->sax->externalSubset != NULL))
  10480. ctxt->sax->externalSubset(ctxt->userData,
  10481. ctxt->intSubName, ctxt->extSubSystem,
  10482. ctxt->extSubURI);
  10483. ctxt->inSubset = 0;
  10484. xmlCleanSpecialAttr(ctxt);
  10485. ctxt->instate = XML_PARSER_PROLOG;
  10486. #ifdef DEBUG_PUSH
  10487. xmlGenericError(xmlGenericErrorContext,
  10488. "PP: entering PROLOG\n");
  10489. #endif
  10490. }
  10491. } else if ((cur == '<') && (next == '!') &&
  10492. (avail < 9)) {
  10493. goto done;
  10494. } else {
  10495. ctxt->instate = XML_PARSER_START_TAG;
  10496. ctxt->progressive = 1;
  10497. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  10498. #ifdef DEBUG_PUSH
  10499. xmlGenericError(xmlGenericErrorContext,
  10500. "PP: entering START_TAG\n");
  10501. #endif
  10502. }
  10503. break;
  10504. case XML_PARSER_PROLOG:
  10505. SKIP_BLANKS;
  10506. if (ctxt->input->buf == NULL)
  10507. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  10508. else
  10509. avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
  10510. if (avail < 2)
  10511. goto done;
  10512. cur = ctxt->input->cur[0];
  10513. next = ctxt->input->cur[1];
  10514. if ((cur == '<') && (next == '?')) {
  10515. if ((!terminate) &&
  10516. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10517. goto done;
  10518. #ifdef DEBUG_PUSH
  10519. xmlGenericError(xmlGenericErrorContext,
  10520. "PP: Parsing PI\n");
  10521. #endif
  10522. xmlParsePI(ctxt);
  10523. } else if ((cur == '<') && (next == '!') &&
  10524. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  10525. if ((!terminate) &&
  10526. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
  10527. goto done;
  10528. #ifdef DEBUG_PUSH
  10529. xmlGenericError(xmlGenericErrorContext,
  10530. "PP: Parsing Comment\n");
  10531. #endif
  10532. xmlParseComment(ctxt);
  10533. ctxt->instate = XML_PARSER_PROLOG;
  10534. } else if ((cur == '<') && (next == '!') &&
  10535. (avail < 4)) {
  10536. goto done;
  10537. } else {
  10538. ctxt->instate = XML_PARSER_START_TAG;
  10539. if (ctxt->progressive == 0)
  10540. ctxt->progressive = 1;
  10541. xmlParseGetLasts(ctxt, &lastlt, &lastgt);
  10542. #ifdef DEBUG_PUSH
  10543. xmlGenericError(xmlGenericErrorContext,
  10544. "PP: entering START_TAG\n");
  10545. #endif
  10546. }
  10547. break;
  10548. case XML_PARSER_EPILOG:
  10549. SKIP_BLANKS;
  10550. if (ctxt->input->buf == NULL)
  10551. avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
  10552. else
  10553. avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
  10554. if (avail < 2)
  10555. goto done;
  10556. cur = ctxt->input->cur[0];
  10557. next = ctxt->input->cur[1];
  10558. if ((cur == '<') && (next == '?')) {
  10559. if ((!terminate) &&
  10560. (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
  10561. goto done;
  10562. #ifdef DEBUG_PUSH
  10563. xmlGenericError(xmlGenericErrorContext,
  10564. "PP: Parsing PI\n");
  10565. #endif
  10566. xmlParsePI(ctxt);
  10567. ctxt->instate = XML_PARSER_EPILOG;
  10568. } else if ((cur == '<') && (next == '!') &&
  10569. (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
  10570. if ((!terminate) &&
  10571. (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
  10572. goto done;
  10573. #ifdef DEBUG_PUSH
  10574. xmlGenericError(xmlGenericErrorContext,
  10575. "PP: Parsing Comment\n");
  10576. #endif
  10577. xmlParseComment(ctxt);
  10578. ctxt->instate = XML_PARSER_EPILOG;
  10579. } else if ((cur == '<') && (next == '!') &&
  10580. (avail < 4)) {
  10581. goto done;
  10582. } else {
  10583. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  10584. ctxt->instate = XML_PARSER_EOF;
  10585. #ifdef DEBUG_PUSH
  10586. xmlGenericError(xmlGenericErrorContext,
  10587. "PP: entering EOF\n");
  10588. #endif
  10589. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10590. ctxt->sax->endDocument(ctxt->userData);
  10591. goto done;
  10592. }
  10593. break;
  10594. case XML_PARSER_DTD: {
  10595. /*
  10596. * Sorry but progressive parsing of the internal subset
  10597. * is not expected to be supported. We first check that
  10598. * the full content of the internal subset is available and
  10599. * the parsing is launched only at that point.
  10600. * Internal subset ends up with "']' S? '>'" in an unescaped
  10601. * section and not in a ']]>' sequence which are conditional
  10602. * sections (whoever argued to keep that crap in XML deserve
  10603. * a place in hell !).
  10604. */
  10605. int base, i;
  10606. xmlChar *buf;
  10607. xmlChar quote = 0;
  10608. base = ctxt->input->cur - ctxt->input->base;
  10609. if (base < 0) return(0);
  10610. if (ctxt->checkIndex > base)
  10611. base = ctxt->checkIndex;
  10612. buf = ctxt->input->buf->buffer->content;
  10613. for (;(unsigned int) base < ctxt->input->buf->buffer->use;
  10614. base++) {
  10615. if (quote != 0) {
  10616. if (buf[base] == quote)
  10617. quote = 0;
  10618. continue;
  10619. }
  10620. if ((quote == 0) && (buf[base] == '<')) {
  10621. int found = 0;
  10622. /* special handling of comments */
  10623. if (((unsigned int) base + 4 <
  10624. ctxt->input->buf->buffer->use) &&
  10625. (buf[base + 1] == '!') &&
  10626. (buf[base + 2] == '-') &&
  10627. (buf[base + 3] == '-')) {
  10628. for (;(unsigned int) base + 3 <
  10629. ctxt->input->buf->buffer->use; base++) {
  10630. if ((buf[base] == '-') &&
  10631. (buf[base + 1] == '-') &&
  10632. (buf[base + 2] == '>')) {
  10633. found = 1;
  10634. base += 2;
  10635. break;
  10636. }
  10637. }
  10638. if (!found) {
  10639. #if 0
  10640. fprintf(stderr, "unfinished comment\n");
  10641. #endif
  10642. break; /* for */
  10643. }
  10644. continue;
  10645. }
  10646. }
  10647. if (buf[base] == '"') {
  10648. quote = '"';
  10649. continue;
  10650. }
  10651. if (buf[base] == '\'') {
  10652. quote = '\'';
  10653. continue;
  10654. }
  10655. if (buf[base] == ']') {
  10656. #if 0
  10657. fprintf(stderr, "%c%c%c%c: ", buf[base],
  10658. buf[base + 1], buf[base + 2], buf[base + 3]);
  10659. #endif
  10660. if ((unsigned int) base +1 >=
  10661. ctxt->input->buf->buffer->use)
  10662. break;
  10663. if (buf[base + 1] == ']') {
  10664. /* conditional crap, skip both ']' ! */
  10665. base++;
  10666. continue;
  10667. }
  10668. for (i = 1;
  10669. (unsigned int) base + i < ctxt->input->buf->buffer->use;
  10670. i++) {
  10671. if (buf[base + i] == '>') {
  10672. #if 0
  10673. fprintf(stderr, "found\n");
  10674. #endif
  10675. goto found_end_int_subset;
  10676. }
  10677. if (!IS_BLANK_CH(buf[base + i])) {
  10678. #if 0
  10679. fprintf(stderr, "not found\n");
  10680. #endif
  10681. goto not_end_of_int_subset;
  10682. }
  10683. }
  10684. #if 0
  10685. fprintf(stderr, "end of stream\n");
  10686. #endif
  10687. break;
  10688. }
  10689. not_end_of_int_subset:
  10690. continue; /* for */
  10691. }
  10692. /*
  10693. * We didn't found the end of the Internal subset
  10694. */
  10695. #ifdef DEBUG_PUSH
  10696. if (next == 0)
  10697. xmlGenericError(xmlGenericErrorContext,
  10698. "PP: lookup of int subset end filed\n");
  10699. #endif
  10700. goto done;
  10701. found_end_int_subset:
  10702. xmlParseInternalSubset(ctxt);
  10703. ctxt->inSubset = 2;
  10704. if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
  10705. (ctxt->sax->externalSubset != NULL))
  10706. ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
  10707. ctxt->extSubSystem, ctxt->extSubURI);
  10708. ctxt->inSubset = 0;
  10709. xmlCleanSpecialAttr(ctxt);
  10710. ctxt->instate = XML_PARSER_PROLOG;
  10711. ctxt->checkIndex = 0;
  10712. #ifdef DEBUG_PUSH
  10713. xmlGenericError(xmlGenericErrorContext,
  10714. "PP: entering PROLOG\n");
  10715. #endif
  10716. break;
  10717. }
  10718. case XML_PARSER_COMMENT:
  10719. xmlGenericError(xmlGenericErrorContext,
  10720. "PP: internal error, state == COMMENT\n");
  10721. ctxt->instate = XML_PARSER_CONTENT;
  10722. #ifdef DEBUG_PUSH
  10723. xmlGenericError(xmlGenericErrorContext,
  10724. "PP: entering CONTENT\n");
  10725. #endif
  10726. break;
  10727. case XML_PARSER_IGNORE:
  10728. xmlGenericError(xmlGenericErrorContext,
  10729. "PP: internal error, state == IGNORE");
  10730. ctxt->instate = XML_PARSER_DTD;
  10731. #ifdef DEBUG_PUSH
  10732. xmlGenericError(xmlGenericErrorContext,
  10733. "PP: entering DTD\n");
  10734. #endif
  10735. break;
  10736. case XML_PARSER_PI:
  10737. xmlGenericError(xmlGenericErrorContext,
  10738. "PP: internal error, state == PI\n");
  10739. ctxt->instate = XML_PARSER_CONTENT;
  10740. #ifdef DEBUG_PUSH
  10741. xmlGenericError(xmlGenericErrorContext,
  10742. "PP: entering CONTENT\n");
  10743. #endif
  10744. break;
  10745. case XML_PARSER_ENTITY_DECL:
  10746. xmlGenericError(xmlGenericErrorContext,
  10747. "PP: internal error, state == ENTITY_DECL\n");
  10748. ctxt->instate = XML_PARSER_DTD;
  10749. #ifdef DEBUG_PUSH
  10750. xmlGenericError(xmlGenericErrorContext,
  10751. "PP: entering DTD\n");
  10752. #endif
  10753. break;
  10754. case XML_PARSER_ENTITY_VALUE:
  10755. xmlGenericError(xmlGenericErrorContext,
  10756. "PP: internal error, state == ENTITY_VALUE\n");
  10757. ctxt->instate = XML_PARSER_CONTENT;
  10758. #ifdef DEBUG_PUSH
  10759. xmlGenericError(xmlGenericErrorContext,
  10760. "PP: entering DTD\n");
  10761. #endif
  10762. break;
  10763. case XML_PARSER_ATTRIBUTE_VALUE:
  10764. xmlGenericError(xmlGenericErrorContext,
  10765. "PP: internal error, state == ATTRIBUTE_VALUE\n");
  10766. ctxt->instate = XML_PARSER_START_TAG;
  10767. #ifdef DEBUG_PUSH
  10768. xmlGenericError(xmlGenericErrorContext,
  10769. "PP: entering START_TAG\n");
  10770. #endif
  10771. break;
  10772. case XML_PARSER_SYSTEM_LITERAL:
  10773. xmlGenericError(xmlGenericErrorContext,
  10774. "PP: internal error, state == SYSTEM_LITERAL\n");
  10775. ctxt->instate = XML_PARSER_START_TAG;
  10776. #ifdef DEBUG_PUSH
  10777. xmlGenericError(xmlGenericErrorContext,
  10778. "PP: entering START_TAG\n");
  10779. #endif
  10780. break;
  10781. case XML_PARSER_PUBLIC_LITERAL:
  10782. xmlGenericError(xmlGenericErrorContext,
  10783. "PP: internal error, state == PUBLIC_LITERAL\n");
  10784. ctxt->instate = XML_PARSER_START_TAG;
  10785. #ifdef DEBUG_PUSH
  10786. xmlGenericError(xmlGenericErrorContext,
  10787. "PP: entering START_TAG\n");
  10788. #endif
  10789. break;
  10790. }
  10791. }
  10792. done:
  10793. #ifdef DEBUG_PUSH
  10794. xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
  10795. #endif
  10796. return(ret);
  10797. encoding_error:
  10798. {
  10799. char buffer[150];
  10800. snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
  10801. ctxt->input->cur[0], ctxt->input->cur[1],
  10802. ctxt->input->cur[2], ctxt->input->cur[3]);
  10803. __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
  10804. "Input is not proper UTF-8, indicate encoding !\n%s",
  10805. BAD_CAST buffer, NULL);
  10806. }
  10807. return(0);
  10808. }
  10809. /**
  10810. * xmlParseChunk:
  10811. * @ctxt: an XML parser context
  10812. * @chunk: an char array
  10813. * @size: the size in byte of the chunk
  10814. * @terminate: last chunk indicator
  10815. *
  10816. * Parse a Chunk of memory
  10817. *
  10818. * Returns zero if no error, the xmlParserErrors otherwise.
  10819. */
  10820. int
  10821. xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
  10822. int terminate) {
  10823. int end_in_lf = 0;
  10824. int remain = 0;
  10825. if (ctxt == NULL)
  10826. return(XML_ERR_INTERNAL_ERROR);
  10827. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  10828. return(ctxt->errNo);
  10829. if (ctxt->instate == XML_PARSER_START)
  10830. xmlDetectSAX2(ctxt);
  10831. if ((size > 0) && (chunk != NULL) && (!terminate) &&
  10832. (chunk[size - 1] == '\r')) {
  10833. end_in_lf = 1;
  10834. size--;
  10835. }
  10836. xmldecl_done:
  10837. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  10838. (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
  10839. int base = ctxt->input->base - ctxt->input->buf->buffer->content;
  10840. int cur = ctxt->input->cur - ctxt->input->base;
  10841. int res;
  10842. /*
  10843. * Specific handling if we autodetected an encoding, we should not
  10844. * push more than the first line ... which depend on the encoding
  10845. * And only push the rest once the final encoding was detected
  10846. */
  10847. if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
  10848. (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
  10849. int len = 45;
  10850. if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  10851. BAD_CAST "UTF-16")) ||
  10852. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  10853. BAD_CAST "UTF16")))
  10854. len = 90;
  10855. else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  10856. BAD_CAST "UCS-4")) ||
  10857. (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
  10858. BAD_CAST "UCS4")))
  10859. len = 180;
  10860. if (ctxt->input->buf->rawconsumed < len)
  10861. len -= ctxt->input->buf->rawconsumed;
  10862. remain = size - len;
  10863. size = len;
  10864. }
  10865. res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  10866. if (res < 0) {
  10867. ctxt->errNo = XML_PARSER_EOF;
  10868. ctxt->disableSAX = 1;
  10869. return (XML_PARSER_EOF);
  10870. }
  10871. ctxt->input->base = ctxt->input->buf->buffer->content + base;
  10872. ctxt->input->cur = ctxt->input->base + cur;
  10873. ctxt->input->end =
  10874. &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
  10875. #ifdef DEBUG_PUSH
  10876. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  10877. #endif
  10878. } else if (ctxt->instate != XML_PARSER_EOF) {
  10879. if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
  10880. xmlParserInputBufferPtr in = ctxt->input->buf;
  10881. if ((in->encoder != NULL) && (in->buffer != NULL) &&
  10882. (in->raw != NULL)) {
  10883. int nbchars;
  10884. nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
  10885. if (nbchars < 0) {
  10886. /* TODO 2.6.0 */
  10887. xmlGenericError(xmlGenericErrorContext,
  10888. "xmlParseChunk: encoder error\n");
  10889. return(XML_ERR_INVALID_ENCODING);
  10890. }
  10891. }
  10892. }
  10893. }
  10894. if (remain != 0)
  10895. xmlParseTryOrFinish(ctxt, 0);
  10896. else
  10897. xmlParseTryOrFinish(ctxt, terminate);
  10898. if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
  10899. return(ctxt->errNo);
  10900. if (remain != 0) {
  10901. chunk += size;
  10902. size = remain;
  10903. remain = 0;
  10904. goto xmldecl_done;
  10905. }
  10906. if ((end_in_lf == 1) && (ctxt->input != NULL) &&
  10907. (ctxt->input->buf != NULL)) {
  10908. xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
  10909. }
  10910. if (terminate) {
  10911. /*
  10912. * Check for termination
  10913. */
  10914. int avail = 0;
  10915. if (ctxt->input != NULL) {
  10916. if (ctxt->input->buf == NULL)
  10917. avail = ctxt->input->length -
  10918. (ctxt->input->cur - ctxt->input->base);
  10919. else
  10920. avail = ctxt->input->buf->buffer->use -
  10921. (ctxt->input->cur - ctxt->input->base);
  10922. }
  10923. if ((ctxt->instate != XML_PARSER_EOF) &&
  10924. (ctxt->instate != XML_PARSER_EPILOG)) {
  10925. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  10926. }
  10927. if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
  10928. xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
  10929. }
  10930. if (ctxt->instate != XML_PARSER_EOF) {
  10931. if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
  10932. ctxt->sax->endDocument(ctxt->userData);
  10933. }
  10934. ctxt->instate = XML_PARSER_EOF;
  10935. }
  10936. return((xmlParserErrors) ctxt->errNo);
  10937. }
  10938. /************************************************************************
  10939. * *
  10940. * I/O front end functions to the parser *
  10941. * *
  10942. ************************************************************************/
  10943. /**
  10944. * xmlCreatePushParserCtxt:
  10945. * @sax: a SAX handler
  10946. * @user_data: The user data returned on SAX callbacks
  10947. * @chunk: a pointer to an array of chars
  10948. * @size: number of chars in the array
  10949. * @filename: an optional file name or URI
  10950. *
  10951. * Create a parser context for using the XML parser in push mode.
  10952. * If @buffer and @size are non-NULL, the data is used to detect
  10953. * the encoding. The remaining characters will be parsed so they
  10954. * don't need to be fed in again through xmlParseChunk.
  10955. * To allow content encoding detection, @size should be >= 4
  10956. * The value of @filename is used for fetching external entities
  10957. * and error/warning reports.
  10958. *
  10959. * Returns the new parser context or NULL
  10960. */
  10961. xmlParserCtxtPtr
  10962. xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  10963. const char *chunk, int size, const char *filename) {
  10964. xmlParserCtxtPtr ctxt;
  10965. xmlParserInputPtr inputStream;
  10966. xmlParserInputBufferPtr buf;
  10967. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  10968. /*
  10969. * plug some encoding conversion routines
  10970. */
  10971. if ((chunk != NULL) && (size >= 4))
  10972. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  10973. buf = xmlAllocParserInputBuffer(enc);
  10974. if (buf == NULL) return(NULL);
  10975. ctxt = xmlNewParserCtxt();
  10976. if (ctxt == NULL) {
  10977. xmlErrMemory(NULL, "creating parser: out of memory\n");
  10978. xmlFreeParserInputBuffer(buf);
  10979. return(NULL);
  10980. }
  10981. ctxt->dictNames = 1;
  10982. ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
  10983. if (ctxt->pushTab == NULL) {
  10984. xmlErrMemory(ctxt, NULL);
  10985. xmlFreeParserInputBuffer(buf);
  10986. xmlFreeParserCtxt(ctxt);
  10987. return(NULL);
  10988. }
  10989. if (sax != NULL) {
  10990. #ifdef LIBXML_SAX1_ENABLED
  10991. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  10992. #endif /* LIBXML_SAX1_ENABLED */
  10993. xmlFree(ctxt->sax);
  10994. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  10995. if (ctxt->sax == NULL) {
  10996. xmlErrMemory(ctxt, NULL);
  10997. xmlFreeParserInputBuffer(buf);
  10998. xmlFreeParserCtxt(ctxt);
  10999. return(NULL);
  11000. }
  11001. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11002. if (sax->initialized == XML_SAX2_MAGIC)
  11003. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11004. else
  11005. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11006. if (user_data != NULL)
  11007. ctxt->userData = user_data;
  11008. }
  11009. if (filename == NULL) {
  11010. ctxt->directory = NULL;
  11011. } else {
  11012. ctxt->directory = xmlParserGetDirectory(filename);
  11013. }
  11014. inputStream = xmlNewInputStream(ctxt);
  11015. if (inputStream == NULL) {
  11016. xmlFreeParserCtxt(ctxt);
  11017. xmlFreeParserInputBuffer(buf);
  11018. return(NULL);
  11019. }
  11020. if (filename == NULL)
  11021. inputStream->filename = NULL;
  11022. else {
  11023. inputStream->filename = (char *)
  11024. xmlCanonicPath((const xmlChar *) filename);
  11025. if (inputStream->filename == NULL) {
  11026. xmlFreeParserCtxt(ctxt);
  11027. xmlFreeParserInputBuffer(buf);
  11028. return(NULL);
  11029. }
  11030. }
  11031. inputStream->buf = buf;
  11032. inputStream->base = inputStream->buf->buffer->content;
  11033. inputStream->cur = inputStream->buf->buffer->content;
  11034. inputStream->end =
  11035. &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
  11036. inputPush(ctxt, inputStream);
  11037. /*
  11038. * If the caller didn't provide an initial 'chunk' for determining
  11039. * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
  11040. * that it can be automatically determined later
  11041. */
  11042. if ((size == 0) || (chunk == NULL)) {
  11043. ctxt->charset = XML_CHAR_ENCODING_NONE;
  11044. } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
  11045. int base = ctxt->input->base - ctxt->input->buf->buffer->content;
  11046. int cur = ctxt->input->cur - ctxt->input->base;
  11047. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  11048. ctxt->input->base = ctxt->input->buf->buffer->content + base;
  11049. ctxt->input->cur = ctxt->input->base + cur;
  11050. ctxt->input->end =
  11051. &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
  11052. #ifdef DEBUG_PUSH
  11053. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  11054. #endif
  11055. }
  11056. if (enc != XML_CHAR_ENCODING_NONE) {
  11057. xmlSwitchEncoding(ctxt, enc);
  11058. }
  11059. return(ctxt);
  11060. }
  11061. #endif /* LIBXML_PUSH_ENABLED */
  11062. /**
  11063. * xmlStopParser:
  11064. * @ctxt: an XML parser context
  11065. *
  11066. * Blocks further parser processing
  11067. */
  11068. void
  11069. xmlStopParser(xmlParserCtxtPtr ctxt) {
  11070. if (ctxt == NULL)
  11071. return;
  11072. ctxt->instate = XML_PARSER_EOF;
  11073. ctxt->disableSAX = 1;
  11074. if (ctxt->input != NULL) {
  11075. ctxt->input->cur = BAD_CAST"";
  11076. ctxt->input->base = ctxt->input->cur;
  11077. }
  11078. }
  11079. /**
  11080. * xmlCreateIOParserCtxt:
  11081. * @sax: a SAX handler
  11082. * @user_data: The user data returned on SAX callbacks
  11083. * @ioread: an I/O read function
  11084. * @ioclose: an I/O close function
  11085. * @ioctx: an I/O handler
  11086. * @enc: the charset encoding if known
  11087. *
  11088. * Create a parser context for using the XML parser with an existing
  11089. * I/O stream
  11090. *
  11091. * Returns the new parser context or NULL
  11092. */
  11093. xmlParserCtxtPtr
  11094. xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
  11095. xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  11096. void *ioctx, xmlCharEncoding enc) {
  11097. xmlParserCtxtPtr ctxt;
  11098. xmlParserInputPtr inputStream;
  11099. xmlParserInputBufferPtr buf;
  11100. if (ioread == NULL) return(NULL);
  11101. buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
  11102. if (buf == NULL) return(NULL);
  11103. ctxt = xmlNewParserCtxt();
  11104. if (ctxt == NULL) {
  11105. xmlFreeParserInputBuffer(buf);
  11106. return(NULL);
  11107. }
  11108. if (sax != NULL) {
  11109. #ifdef LIBXML_SAX1_ENABLED
  11110. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  11111. #endif /* LIBXML_SAX1_ENABLED */
  11112. xmlFree(ctxt->sax);
  11113. ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
  11114. if (ctxt->sax == NULL) {
  11115. xmlErrMemory(ctxt, NULL);
  11116. xmlFreeParserCtxt(ctxt);
  11117. return(NULL);
  11118. }
  11119. memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
  11120. if (sax->initialized == XML_SAX2_MAGIC)
  11121. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
  11122. else
  11123. memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
  11124. if (user_data != NULL)
  11125. ctxt->userData = user_data;
  11126. }
  11127. inputStream = xmlNewIOInputStream(ctxt, buf, enc);
  11128. if (inputStream == NULL) {
  11129. xmlFreeParserCtxt(ctxt);
  11130. return(NULL);
  11131. }
  11132. inputPush(ctxt, inputStream);
  11133. return(ctxt);
  11134. }
  11135. #ifdef LIBXML_VALID_ENABLED
  11136. /************************************************************************
  11137. * *
  11138. * Front ends when parsing a DTD *
  11139. * *
  11140. ************************************************************************/
  11141. /**
  11142. * xmlIOParseDTD:
  11143. * @sax: the SAX handler block or NULL
  11144. * @input: an Input Buffer
  11145. * @enc: the charset encoding if known
  11146. *
  11147. * Load and parse a DTD
  11148. *
  11149. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11150. * @input will be freed by the function in any case.
  11151. */
  11152. xmlDtdPtr
  11153. xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
  11154. xmlCharEncoding enc) {
  11155. xmlDtdPtr ret = NULL;
  11156. xmlParserCtxtPtr ctxt;
  11157. xmlParserInputPtr pinput = NULL;
  11158. xmlChar start[4];
  11159. if (input == NULL)
  11160. return(NULL);
  11161. ctxt = xmlNewParserCtxt();
  11162. if (ctxt == NULL) {
  11163. xmlFreeParserInputBuffer(input);
  11164. return(NULL);
  11165. }
  11166. /*
  11167. * Set-up the SAX context
  11168. */
  11169. if (sax != NULL) {
  11170. if (ctxt->sax != NULL)
  11171. xmlFree(ctxt->sax);
  11172. ctxt->sax = sax;
  11173. ctxt->userData = ctxt;
  11174. }
  11175. xmlDetectSAX2(ctxt);
  11176. /*
  11177. * generate a parser input from the I/O handler
  11178. */
  11179. pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  11180. if (pinput == NULL) {
  11181. if (sax != NULL) ctxt->sax = NULL;
  11182. xmlFreeParserInputBuffer(input);
  11183. xmlFreeParserCtxt(ctxt);
  11184. return(NULL);
  11185. }
  11186. /*
  11187. * plug some encoding conversion routines here.
  11188. */
  11189. if (xmlPushInput(ctxt, pinput) < 0) {
  11190. if (sax != NULL) ctxt->sax = NULL;
  11191. xmlFreeParserCtxt(ctxt);
  11192. return(NULL);
  11193. }
  11194. if (enc != XML_CHAR_ENCODING_NONE) {
  11195. xmlSwitchEncoding(ctxt, enc);
  11196. }
  11197. pinput->filename = NULL;
  11198. pinput->line = 1;
  11199. pinput->col = 1;
  11200. pinput->base = ctxt->input->cur;
  11201. pinput->cur = ctxt->input->cur;
  11202. pinput->free = NULL;
  11203. /*
  11204. * let's parse that entity knowing it's an external subset.
  11205. */
  11206. ctxt->inSubset = 2;
  11207. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  11208. if (ctxt->myDoc == NULL) {
  11209. xmlErrMemory(ctxt, "New Doc failed");
  11210. return(NULL);
  11211. }
  11212. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  11213. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  11214. BAD_CAST "none", BAD_CAST "none");
  11215. if ((enc == XML_CHAR_ENCODING_NONE) &&
  11216. ((ctxt->input->end - ctxt->input->cur) >= 4)) {
  11217. /*
  11218. * Get the 4 first bytes and decode the charset
  11219. * if enc != XML_CHAR_ENCODING_NONE
  11220. * plug some encoding conversion routines.
  11221. */
  11222. start[0] = RAW;
  11223. start[1] = NXT(1);
  11224. start[2] = NXT(2);
  11225. start[3] = NXT(3);
  11226. enc = xmlDetectCharEncoding(start, 4);
  11227. if (enc != XML_CHAR_ENCODING_NONE) {
  11228. xmlSwitchEncoding(ctxt, enc);
  11229. }
  11230. }
  11231. xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
  11232. if (ctxt->myDoc != NULL) {
  11233. if (ctxt->wellFormed) {
  11234. ret = ctxt->myDoc->extSubset;
  11235. ctxt->myDoc->extSubset = NULL;
  11236. if (ret != NULL) {
  11237. xmlNodePtr tmp;
  11238. ret->doc = NULL;
  11239. tmp = ret->children;
  11240. while (tmp != NULL) {
  11241. tmp->doc = NULL;
  11242. tmp = tmp->next;
  11243. }
  11244. }
  11245. } else {
  11246. ret = NULL;
  11247. }
  11248. xmlFreeDoc(ctxt->myDoc);
  11249. ctxt->myDoc = NULL;
  11250. }
  11251. if (sax != NULL) ctxt->sax = NULL;
  11252. xmlFreeParserCtxt(ctxt);
  11253. return(ret);
  11254. }
  11255. /**
  11256. * xmlSAXParseDTD:
  11257. * @sax: the SAX handler block
  11258. * @ExternalID: a NAME* containing the External ID of the DTD
  11259. * @SystemID: a NAME* containing the URL to the DTD
  11260. *
  11261. * Load and parse an external subset.
  11262. *
  11263. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11264. */
  11265. xmlDtdPtr
  11266. xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
  11267. const xmlChar *SystemID) {
  11268. xmlDtdPtr ret = NULL;
  11269. xmlParserCtxtPtr ctxt;
  11270. xmlParserInputPtr input = NULL;
  11271. xmlCharEncoding enc;
  11272. xmlChar* systemIdCanonic;
  11273. if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
  11274. ctxt = xmlNewParserCtxt();
  11275. if (ctxt == NULL) {
  11276. return(NULL);
  11277. }
  11278. /*
  11279. * Set-up the SAX context
  11280. */
  11281. if (sax != NULL) {
  11282. if (ctxt->sax != NULL)
  11283. xmlFree(ctxt->sax);
  11284. ctxt->sax = sax;
  11285. ctxt->userData = ctxt;
  11286. }
  11287. /*
  11288. * Canonicalise the system ID
  11289. */
  11290. systemIdCanonic = xmlCanonicPath(SystemID);
  11291. if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
  11292. xmlFreeParserCtxt(ctxt);
  11293. return(NULL);
  11294. }
  11295. /*
  11296. * Ask the Entity resolver to load the damn thing
  11297. */
  11298. if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
  11299. input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
  11300. systemIdCanonic);
  11301. if (input == NULL) {
  11302. if (sax != NULL) ctxt->sax = NULL;
  11303. xmlFreeParserCtxt(ctxt);
  11304. if (systemIdCanonic != NULL)
  11305. xmlFree(systemIdCanonic);
  11306. return(NULL);
  11307. }
  11308. /*
  11309. * plug some encoding conversion routines here.
  11310. */
  11311. if (xmlPushInput(ctxt, input) < 0) {
  11312. if (sax != NULL) ctxt->sax = NULL;
  11313. xmlFreeParserCtxt(ctxt);
  11314. if (systemIdCanonic != NULL)
  11315. xmlFree(systemIdCanonic);
  11316. return(NULL);
  11317. }
  11318. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  11319. enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
  11320. xmlSwitchEncoding(ctxt, enc);
  11321. }
  11322. if (input->filename == NULL)
  11323. input->filename = (char *) systemIdCanonic;
  11324. else
  11325. xmlFree(systemIdCanonic);
  11326. input->line = 1;
  11327. input->col = 1;
  11328. input->base = ctxt->input->cur;
  11329. input->cur = ctxt->input->cur;
  11330. input->free = NULL;
  11331. /*
  11332. * let's parse that entity knowing it's an external subset.
  11333. */
  11334. ctxt->inSubset = 2;
  11335. ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
  11336. if (ctxt->myDoc == NULL) {
  11337. xmlErrMemory(ctxt, "New Doc failed");
  11338. if (sax != NULL) ctxt->sax = NULL;
  11339. xmlFreeParserCtxt(ctxt);
  11340. return(NULL);
  11341. }
  11342. ctxt->myDoc->properties = XML_DOC_INTERNAL;
  11343. ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
  11344. ExternalID, SystemID);
  11345. xmlParseExternalSubset(ctxt, ExternalID, SystemID);
  11346. if (ctxt->myDoc != NULL) {
  11347. if (ctxt->wellFormed) {
  11348. ret = ctxt->myDoc->extSubset;
  11349. ctxt->myDoc->extSubset = NULL;
  11350. if (ret != NULL) {
  11351. xmlNodePtr tmp;
  11352. ret->doc = NULL;
  11353. tmp = ret->children;
  11354. while (tmp != NULL) {
  11355. tmp->doc = NULL;
  11356. tmp = tmp->next;
  11357. }
  11358. }
  11359. } else {
  11360. ret = NULL;
  11361. }
  11362. xmlFreeDoc(ctxt->myDoc);
  11363. ctxt->myDoc = NULL;
  11364. }
  11365. if (sax != NULL) ctxt->sax = NULL;
  11366. xmlFreeParserCtxt(ctxt);
  11367. return(ret);
  11368. }
  11369. /**
  11370. * xmlParseDTD:
  11371. * @ExternalID: a NAME* containing the External ID of the DTD
  11372. * @SystemID: a NAME* containing the URL to the DTD
  11373. *
  11374. * Load and parse an external subset.
  11375. *
  11376. * Returns the resulting xmlDtdPtr or NULL in case of error.
  11377. */
  11378. xmlDtdPtr
  11379. xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
  11380. return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
  11381. }
  11382. #endif /* LIBXML_VALID_ENABLED */
  11383. /************************************************************************
  11384. * *
  11385. * Front ends when parsing an Entity *
  11386. * *
  11387. ************************************************************************/
  11388. /**
  11389. * xmlParseCtxtExternalEntity:
  11390. * @ctx: the existing parsing context
  11391. * @URL: the URL for the entity to load
  11392. * @ID: the System ID for the entity to load
  11393. * @lst: the return value for the set of parsed nodes
  11394. *
  11395. * Parse an external general entity within an existing parsing context
  11396. * An external general parsed entity is well-formed if it matches the
  11397. * production labeled extParsedEnt.
  11398. *
  11399. * [78] extParsedEnt ::= TextDecl? content
  11400. *
  11401. * Returns 0 if the entity is well formed, -1 in case of args problem and
  11402. * the parser error code otherwise
  11403. */
  11404. int
  11405. xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
  11406. const xmlChar *ID, xmlNodePtr *lst) {
  11407. xmlParserCtxtPtr ctxt;
  11408. xmlDocPtr newDoc;
  11409. xmlNodePtr newRoot;
  11410. xmlSAXHandlerPtr oldsax = NULL;
  11411. int ret = 0;
  11412. xmlChar start[4];
  11413. xmlCharEncoding enc;
  11414. if (ctx == NULL) return(-1);
  11415. if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
  11416. (ctx->depth > 1024)) {
  11417. return(XML_ERR_ENTITY_LOOP);
  11418. }
  11419. if (lst != NULL)
  11420. *lst = NULL;
  11421. if ((URL == NULL) && (ID == NULL))
  11422. return(-1);
  11423. if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
  11424. return(-1);
  11425. ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
  11426. if (ctxt == NULL) {
  11427. return(-1);
  11428. }
  11429. oldsax = ctxt->sax;
  11430. ctxt->sax = ctx->sax;
  11431. xmlDetectSAX2(ctxt);
  11432. newDoc = xmlNewDoc(BAD_CAST "1.0");
  11433. if (newDoc == NULL) {
  11434. xmlFreeParserCtxt(ctxt);
  11435. return(-1);
  11436. }
  11437. newDoc->properties = XML_DOC_INTERNAL;
  11438. if (ctx->myDoc->dict) {
  11439. newDoc->dict = ctx->myDoc->dict;
  11440. xmlDictReference(newDoc->dict);
  11441. }
  11442. if (ctx->myDoc != NULL) {
  11443. newDoc->intSubset = ctx->myDoc->intSubset;
  11444. newDoc->extSubset = ctx->myDoc->extSubset;
  11445. }
  11446. if (ctx->myDoc->URL != NULL) {
  11447. newDoc->URL = xmlStrdup(ctx->myDoc->URL);
  11448. }
  11449. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  11450. if (newRoot == NULL) {
  11451. ctxt->sax = oldsax;
  11452. xmlFreeParserCtxt(ctxt);
  11453. newDoc->intSubset = NULL;
  11454. newDoc->extSubset = NULL;
  11455. xmlFreeDoc(newDoc);
  11456. return(-1);
  11457. }
  11458. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  11459. nodePush(ctxt, newDoc->children);
  11460. if (ctx->myDoc == NULL) {
  11461. ctxt->myDoc = newDoc;
  11462. } else {
  11463. ctxt->myDoc = ctx->myDoc;
  11464. newDoc->children->doc = ctx->myDoc;
  11465. }
  11466. /*
  11467. * Get the 4 first bytes and decode the charset
  11468. * if enc != XML_CHAR_ENCODING_NONE
  11469. * plug some encoding conversion routines.
  11470. */
  11471. GROW
  11472. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  11473. start[0] = RAW;
  11474. start[1] = NXT(1);
  11475. start[2] = NXT(2);
  11476. start[3] = NXT(3);
  11477. enc = xmlDetectCharEncoding(start, 4);
  11478. if (enc != XML_CHAR_ENCODING_NONE) {
  11479. xmlSwitchEncoding(ctxt, enc);
  11480. }
  11481. }
  11482. /*
  11483. * Parse a possible text declaration first
  11484. */
  11485. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  11486. xmlParseTextDecl(ctxt);
  11487. /*
  11488. * An XML-1.0 document can't reference an entity not XML-1.0
  11489. */
  11490. if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
  11491. (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
  11492. xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
  11493. "Version mismatch between document and entity\n");
  11494. }
  11495. }
  11496. /*
  11497. * Doing validity checking on chunk doesn't make sense
  11498. */
  11499. ctxt->instate = XML_PARSER_CONTENT;
  11500. ctxt->validate = ctx->validate;
  11501. ctxt->valid = ctx->valid;
  11502. ctxt->loadsubset = ctx->loadsubset;
  11503. ctxt->depth = ctx->depth + 1;
  11504. ctxt->replaceEntities = ctx->replaceEntities;
  11505. if (ctxt->validate) {
  11506. ctxt->vctxt.error = ctx->vctxt.error;
  11507. ctxt->vctxt.warning = ctx->vctxt.warning;
  11508. } else {
  11509. ctxt->vctxt.error = NULL;
  11510. ctxt->vctxt.warning = NULL;
  11511. }
  11512. ctxt->vctxt.nodeTab = NULL;
  11513. ctxt->vctxt.nodeNr = 0;
  11514. ctxt->vctxt.nodeMax = 0;
  11515. ctxt->vctxt.node = NULL;
  11516. if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
  11517. ctxt->dict = ctx->dict;
  11518. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  11519. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  11520. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  11521. ctxt->dictNames = ctx->dictNames;
  11522. ctxt->attsDefault = ctx->attsDefault;
  11523. ctxt->attsSpecial = ctx->attsSpecial;
  11524. ctxt->linenumbers = ctx->linenumbers;
  11525. xmlParseContent(ctxt);
  11526. ctx->validate = ctxt->validate;
  11527. ctx->valid = ctxt->valid;
  11528. if ((RAW == '<') && (NXT(1) == '/')) {
  11529. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11530. } else if (RAW != 0) {
  11531. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  11532. }
  11533. if (ctxt->node != newDoc->children) {
  11534. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11535. }
  11536. if (!ctxt->wellFormed) {
  11537. if (ctxt->errNo == 0)
  11538. ret = 1;
  11539. else
  11540. ret = ctxt->errNo;
  11541. } else {
  11542. if (lst != NULL) {
  11543. xmlNodePtr cur;
  11544. /*
  11545. * Return the newly created nodeset after unlinking it from
  11546. * they pseudo parent.
  11547. */
  11548. cur = newDoc->children->children;
  11549. *lst = cur;
  11550. while (cur != NULL) {
  11551. cur->parent = NULL;
  11552. cur = cur->next;
  11553. }
  11554. newDoc->children->children = NULL;
  11555. }
  11556. ret = 0;
  11557. }
  11558. ctxt->sax = oldsax;
  11559. ctxt->dict = NULL;
  11560. ctxt->attsDefault = NULL;
  11561. ctxt->attsSpecial = NULL;
  11562. xmlFreeParserCtxt(ctxt);
  11563. newDoc->intSubset = NULL;
  11564. newDoc->extSubset = NULL;
  11565. xmlFreeDoc(newDoc);
  11566. return(ret);
  11567. }
  11568. /**
  11569. * xmlParseExternalEntityPrivate:
  11570. * @doc: the document the chunk pertains to
  11571. * @oldctxt: the previous parser context if available
  11572. * @sax: the SAX handler bloc (possibly NULL)
  11573. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  11574. * @depth: Used for loop detection, use 0
  11575. * @URL: the URL for the entity to load
  11576. * @ID: the System ID for the entity to load
  11577. * @list: the return value for the set of parsed nodes
  11578. *
  11579. * Private version of xmlParseExternalEntity()
  11580. *
  11581. * Returns 0 if the entity is well formed, -1 in case of args problem and
  11582. * the parser error code otherwise
  11583. */
  11584. static xmlParserErrors
  11585. xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
  11586. xmlSAXHandlerPtr sax,
  11587. void *user_data, int depth, const xmlChar *URL,
  11588. const xmlChar *ID, xmlNodePtr *list) {
  11589. xmlParserCtxtPtr ctxt;
  11590. xmlDocPtr newDoc;
  11591. xmlNodePtr newRoot;
  11592. xmlSAXHandlerPtr oldsax = NULL;
  11593. xmlParserErrors ret = XML_ERR_OK;
  11594. xmlChar start[4];
  11595. xmlCharEncoding enc;
  11596. if (((depth > 40) &&
  11597. ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  11598. (depth > 1024)) {
  11599. return(XML_ERR_ENTITY_LOOP);
  11600. }
  11601. if (list != NULL)
  11602. *list = NULL;
  11603. if ((URL == NULL) && (ID == NULL))
  11604. return(XML_ERR_INTERNAL_ERROR);
  11605. if (doc == NULL)
  11606. return(XML_ERR_INTERNAL_ERROR);
  11607. ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
  11608. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  11609. ctxt->userData = ctxt;
  11610. if (oldctxt != NULL) {
  11611. ctxt->_private = oldctxt->_private;
  11612. ctxt->loadsubset = oldctxt->loadsubset;
  11613. ctxt->validate = oldctxt->validate;
  11614. ctxt->external = oldctxt->external;
  11615. ctxt->record_info = oldctxt->record_info;
  11616. ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
  11617. ctxt->node_seq.length = oldctxt->node_seq.length;
  11618. ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
  11619. } else {
  11620. /*
  11621. * Doing validity checking on chunk without context
  11622. * doesn't make sense
  11623. */
  11624. ctxt->_private = NULL;
  11625. ctxt->validate = 0;
  11626. ctxt->external = 2;
  11627. ctxt->loadsubset = 0;
  11628. }
  11629. if (sax != NULL) {
  11630. oldsax = ctxt->sax;
  11631. ctxt->sax = sax;
  11632. if (user_data != NULL)
  11633. ctxt->userData = user_data;
  11634. }
  11635. xmlDetectSAX2(ctxt);
  11636. newDoc = xmlNewDoc(BAD_CAST "1.0");
  11637. if (newDoc == NULL) {
  11638. ctxt->node_seq.maximum = 0;
  11639. ctxt->node_seq.length = 0;
  11640. ctxt->node_seq.buffer = NULL;
  11641. xmlFreeParserCtxt(ctxt);
  11642. return(XML_ERR_INTERNAL_ERROR);
  11643. }
  11644. newDoc->properties = XML_DOC_INTERNAL;
  11645. newDoc->intSubset = doc->intSubset;
  11646. newDoc->extSubset = doc->extSubset;
  11647. newDoc->dict = doc->dict;
  11648. xmlDictReference(newDoc->dict);
  11649. if (doc->URL != NULL) {
  11650. newDoc->URL = xmlStrdup(doc->URL);
  11651. }
  11652. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  11653. if (newRoot == NULL) {
  11654. if (sax != NULL)
  11655. ctxt->sax = oldsax;
  11656. ctxt->node_seq.maximum = 0;
  11657. ctxt->node_seq.length = 0;
  11658. ctxt->node_seq.buffer = NULL;
  11659. xmlFreeParserCtxt(ctxt);
  11660. newDoc->intSubset = NULL;
  11661. newDoc->extSubset = NULL;
  11662. xmlFreeDoc(newDoc);
  11663. return(XML_ERR_INTERNAL_ERROR);
  11664. }
  11665. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  11666. nodePush(ctxt, newDoc->children);
  11667. ctxt->myDoc = doc;
  11668. newRoot->doc = doc;
  11669. /*
  11670. * Get the 4 first bytes and decode the charset
  11671. * if enc != XML_CHAR_ENCODING_NONE
  11672. * plug some encoding conversion routines.
  11673. */
  11674. GROW;
  11675. if ((ctxt->input->end - ctxt->input->cur) >= 4) {
  11676. start[0] = RAW;
  11677. start[1] = NXT(1);
  11678. start[2] = NXT(2);
  11679. start[3] = NXT(3);
  11680. enc = xmlDetectCharEncoding(start, 4);
  11681. if (enc != XML_CHAR_ENCODING_NONE) {
  11682. xmlSwitchEncoding(ctxt, enc);
  11683. }
  11684. }
  11685. /*
  11686. * Parse a possible text declaration first
  11687. */
  11688. if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
  11689. xmlParseTextDecl(ctxt);
  11690. }
  11691. ctxt->instate = XML_PARSER_CONTENT;
  11692. ctxt->depth = depth;
  11693. xmlParseContent(ctxt);
  11694. if ((RAW == '<') && (NXT(1) == '/')) {
  11695. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11696. } else if (RAW != 0) {
  11697. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  11698. }
  11699. if (ctxt->node != newDoc->children) {
  11700. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11701. }
  11702. if (!ctxt->wellFormed) {
  11703. if (ctxt->errNo == 0)
  11704. ret = XML_ERR_INTERNAL_ERROR;
  11705. else
  11706. ret = (xmlParserErrors)ctxt->errNo;
  11707. } else {
  11708. if (list != NULL) {
  11709. xmlNodePtr cur;
  11710. /*
  11711. * Return the newly created nodeset after unlinking it from
  11712. * they pseudo parent.
  11713. */
  11714. cur = newDoc->children->children;
  11715. *list = cur;
  11716. while (cur != NULL) {
  11717. cur->parent = NULL;
  11718. cur = cur->next;
  11719. }
  11720. newDoc->children->children = NULL;
  11721. }
  11722. ret = XML_ERR_OK;
  11723. }
  11724. /*
  11725. * Record in the parent context the number of entities replacement
  11726. * done when parsing that reference.
  11727. */
  11728. if (oldctxt != NULL)
  11729. oldctxt->nbentities += ctxt->nbentities;
  11730. /*
  11731. * Also record the size of the entity parsed
  11732. */
  11733. if (ctxt->input != NULL) {
  11734. oldctxt->sizeentities += ctxt->input->consumed;
  11735. oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
  11736. }
  11737. /*
  11738. * And record the last error if any
  11739. */
  11740. if (ctxt->lastError.code != XML_ERR_OK)
  11741. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  11742. if (sax != NULL)
  11743. ctxt->sax = oldsax;
  11744. oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
  11745. oldctxt->node_seq.length = ctxt->node_seq.length;
  11746. oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
  11747. ctxt->node_seq.maximum = 0;
  11748. ctxt->node_seq.length = 0;
  11749. ctxt->node_seq.buffer = NULL;
  11750. xmlFreeParserCtxt(ctxt);
  11751. newDoc->intSubset = NULL;
  11752. newDoc->extSubset = NULL;
  11753. xmlFreeDoc(newDoc);
  11754. return(ret);
  11755. }
  11756. #ifdef LIBXML_SAX1_ENABLED
  11757. /**
  11758. * xmlParseExternalEntity:
  11759. * @doc: the document the chunk pertains to
  11760. * @sax: the SAX handler bloc (possibly NULL)
  11761. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  11762. * @depth: Used for loop detection, use 0
  11763. * @URL: the URL for the entity to load
  11764. * @ID: the System ID for the entity to load
  11765. * @lst: the return value for the set of parsed nodes
  11766. *
  11767. * Parse an external general entity
  11768. * An external general parsed entity is well-formed if it matches the
  11769. * production labeled extParsedEnt.
  11770. *
  11771. * [78] extParsedEnt ::= TextDecl? content
  11772. *
  11773. * Returns 0 if the entity is well formed, -1 in case of args problem and
  11774. * the parser error code otherwise
  11775. */
  11776. int
  11777. xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
  11778. int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
  11779. return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
  11780. ID, lst));
  11781. }
  11782. /**
  11783. * xmlParseBalancedChunkMemory:
  11784. * @doc: the document the chunk pertains to
  11785. * @sax: the SAX handler bloc (possibly NULL)
  11786. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  11787. * @depth: Used for loop detection, use 0
  11788. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  11789. * @lst: the return value for the set of parsed nodes
  11790. *
  11791. * Parse a well-balanced chunk of an XML document
  11792. * called by the parser
  11793. * The allowed sequence for the Well Balanced Chunk is the one defined by
  11794. * the content production in the XML grammar:
  11795. *
  11796. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  11797. *
  11798. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  11799. * the parser error code otherwise
  11800. */
  11801. int
  11802. xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  11803. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
  11804. return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
  11805. depth, string, lst, 0 );
  11806. }
  11807. #endif /* LIBXML_SAX1_ENABLED */
  11808. /**
  11809. * xmlParseBalancedChunkMemoryInternal:
  11810. * @oldctxt: the existing parsing context
  11811. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  11812. * @user_data: the user data field for the parser context
  11813. * @lst: the return value for the set of parsed nodes
  11814. *
  11815. *
  11816. * Parse a well-balanced chunk of an XML document
  11817. * called by the parser
  11818. * The allowed sequence for the Well Balanced Chunk is the one defined by
  11819. * the content production in the XML grammar:
  11820. *
  11821. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  11822. *
  11823. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  11824. * error code otherwise
  11825. *
  11826. * In case recover is set to 1, the nodelist will not be empty even if
  11827. * the parsed chunk is not well balanced.
  11828. */
  11829. static xmlParserErrors
  11830. xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
  11831. const xmlChar *string, void *user_data, xmlNodePtr *lst) {
  11832. xmlParserCtxtPtr ctxt;
  11833. xmlDocPtr newDoc = NULL;
  11834. xmlNodePtr newRoot;
  11835. xmlSAXHandlerPtr oldsax = NULL;
  11836. xmlNodePtr content = NULL;
  11837. xmlNodePtr last = NULL;
  11838. int size;
  11839. xmlParserErrors ret = XML_ERR_OK;
  11840. #ifdef SAX2
  11841. int i;
  11842. #endif
  11843. if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
  11844. (oldctxt->depth > 1024)) {
  11845. return(XML_ERR_ENTITY_LOOP);
  11846. }
  11847. if (lst != NULL)
  11848. *lst = NULL;
  11849. if (string == NULL)
  11850. return(XML_ERR_INTERNAL_ERROR);
  11851. size = xmlStrlen(string);
  11852. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  11853. if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
  11854. if (user_data != NULL)
  11855. ctxt->userData = user_data;
  11856. else
  11857. ctxt->userData = ctxt;
  11858. if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
  11859. ctxt->dict = oldctxt->dict;
  11860. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  11861. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  11862. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  11863. #ifdef SAX2
  11864. /* propagate namespaces down the entity */
  11865. for (i = 0;i < oldctxt->nsNr;i += 2) {
  11866. nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
  11867. }
  11868. #endif
  11869. oldsax = ctxt->sax;
  11870. ctxt->sax = oldctxt->sax;
  11871. xmlDetectSAX2(ctxt);
  11872. ctxt->replaceEntities = oldctxt->replaceEntities;
  11873. ctxt->options = oldctxt->options;
  11874. ctxt->_private = oldctxt->_private;
  11875. if (oldctxt->myDoc == NULL) {
  11876. newDoc = xmlNewDoc(BAD_CAST "1.0");
  11877. if (newDoc == NULL) {
  11878. ctxt->sax = oldsax;
  11879. ctxt->dict = NULL;
  11880. xmlFreeParserCtxt(ctxt);
  11881. return(XML_ERR_INTERNAL_ERROR);
  11882. }
  11883. newDoc->properties = XML_DOC_INTERNAL;
  11884. newDoc->dict = ctxt->dict;
  11885. xmlDictReference(newDoc->dict);
  11886. ctxt->myDoc = newDoc;
  11887. } else {
  11888. ctxt->myDoc = oldctxt->myDoc;
  11889. content = ctxt->myDoc->children;
  11890. last = ctxt->myDoc->last;
  11891. }
  11892. newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
  11893. if (newRoot == NULL) {
  11894. ctxt->sax = oldsax;
  11895. ctxt->dict = NULL;
  11896. xmlFreeParserCtxt(ctxt);
  11897. if (newDoc != NULL) {
  11898. xmlFreeDoc(newDoc);
  11899. }
  11900. return(XML_ERR_INTERNAL_ERROR);
  11901. }
  11902. ctxt->myDoc->children = NULL;
  11903. ctxt->myDoc->last = NULL;
  11904. xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
  11905. nodePush(ctxt, ctxt->myDoc->children);
  11906. ctxt->instate = XML_PARSER_CONTENT;
  11907. ctxt->depth = oldctxt->depth + 1;
  11908. ctxt->validate = 0;
  11909. ctxt->loadsubset = oldctxt->loadsubset;
  11910. if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
  11911. /*
  11912. * ID/IDREF registration will be done in xmlValidateElement below
  11913. */
  11914. ctxt->loadsubset |= XML_SKIP_IDS;
  11915. }
  11916. ctxt->dictNames = oldctxt->dictNames;
  11917. ctxt->attsDefault = oldctxt->attsDefault;
  11918. ctxt->attsSpecial = oldctxt->attsSpecial;
  11919. xmlParseContent(ctxt);
  11920. if ((RAW == '<') && (NXT(1) == '/')) {
  11921. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11922. } else if (RAW != 0) {
  11923. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  11924. }
  11925. if (ctxt->node != ctxt->myDoc->children) {
  11926. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  11927. }
  11928. if (!ctxt->wellFormed) {
  11929. if (ctxt->errNo == 0)
  11930. ret = XML_ERR_INTERNAL_ERROR;
  11931. else
  11932. ret = (xmlParserErrors)ctxt->errNo;
  11933. } else {
  11934. ret = XML_ERR_OK;
  11935. }
  11936. if ((lst != NULL) && (ret == XML_ERR_OK)) {
  11937. xmlNodePtr cur;
  11938. /*
  11939. * Return the newly created nodeset after unlinking it from
  11940. * they pseudo parent.
  11941. */
  11942. cur = ctxt->myDoc->children->children;
  11943. *lst = cur;
  11944. while (cur != NULL) {
  11945. #ifdef LIBXML_VALID_ENABLED
  11946. if ((oldctxt->validate) && (oldctxt->wellFormed) &&
  11947. (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
  11948. (cur->type == XML_ELEMENT_NODE)) {
  11949. oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
  11950. oldctxt->myDoc, cur);
  11951. }
  11952. #endif /* LIBXML_VALID_ENABLED */
  11953. cur->parent = NULL;
  11954. cur = cur->next;
  11955. }
  11956. ctxt->myDoc->children->children = NULL;
  11957. }
  11958. if (ctxt->myDoc != NULL) {
  11959. xmlFreeNode(ctxt->myDoc->children);
  11960. ctxt->myDoc->children = content;
  11961. ctxt->myDoc->last = last;
  11962. }
  11963. /*
  11964. * Record in the parent context the number of entities replacement
  11965. * done when parsing that reference.
  11966. */
  11967. if (oldctxt != NULL)
  11968. oldctxt->nbentities += ctxt->nbentities;
  11969. /*
  11970. * Also record the last error if any
  11971. */
  11972. if (ctxt->lastError.code != XML_ERR_OK)
  11973. xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
  11974. ctxt->sax = oldsax;
  11975. ctxt->dict = NULL;
  11976. ctxt->attsDefault = NULL;
  11977. ctxt->attsSpecial = NULL;
  11978. xmlFreeParserCtxt(ctxt);
  11979. if (newDoc != NULL) {
  11980. xmlFreeDoc(newDoc);
  11981. }
  11982. return(ret);
  11983. }
  11984. /**
  11985. * xmlParseInNodeContext:
  11986. * @node: the context node
  11987. * @data: the input string
  11988. * @datalen: the input string length in bytes
  11989. * @options: a combination of xmlParserOption
  11990. * @lst: the return value for the set of parsed nodes
  11991. *
  11992. * Parse a well-balanced chunk of an XML document
  11993. * within the context (DTD, namespaces, etc ...) of the given node.
  11994. *
  11995. * The allowed sequence for the data is a Well Balanced Chunk defined by
  11996. * the content production in the XML grammar:
  11997. *
  11998. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  11999. *
  12000. * Returns XML_ERR_OK if the chunk is well balanced, and the parser
  12001. * error code otherwise
  12002. */
  12003. xmlParserErrors
  12004. xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
  12005. int options, xmlNodePtr *lst) {
  12006. #ifdef SAX2
  12007. xmlParserCtxtPtr ctxt;
  12008. xmlDocPtr doc = NULL;
  12009. xmlNodePtr fake, cur;
  12010. int nsnr = 0;
  12011. xmlParserErrors ret = XML_ERR_OK;
  12012. /*
  12013. * check all input parameters, grab the document
  12014. */
  12015. if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
  12016. return(XML_ERR_INTERNAL_ERROR);
  12017. switch (node->type) {
  12018. case XML_ELEMENT_NODE:
  12019. case XML_ATTRIBUTE_NODE:
  12020. case XML_TEXT_NODE:
  12021. case XML_CDATA_SECTION_NODE:
  12022. case XML_ENTITY_REF_NODE:
  12023. case XML_PI_NODE:
  12024. case XML_COMMENT_NODE:
  12025. case XML_DOCUMENT_NODE:
  12026. case XML_HTML_DOCUMENT_NODE:
  12027. break;
  12028. default:
  12029. return(XML_ERR_INTERNAL_ERROR);
  12030. }
  12031. while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
  12032. (node->type != XML_DOCUMENT_NODE) &&
  12033. (node->type != XML_HTML_DOCUMENT_NODE))
  12034. node = node->parent;
  12035. if (node == NULL)
  12036. return(XML_ERR_INTERNAL_ERROR);
  12037. if (node->type == XML_ELEMENT_NODE)
  12038. doc = node->doc;
  12039. else
  12040. doc = (xmlDocPtr) node;
  12041. if (doc == NULL)
  12042. return(XML_ERR_INTERNAL_ERROR);
  12043. /*
  12044. * allocate a context and set-up everything not related to the
  12045. * node position in the tree
  12046. */
  12047. if (doc->type == XML_DOCUMENT_NODE)
  12048. ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
  12049. #ifdef LIBXML_HTML_ENABLED
  12050. else if (doc->type == XML_HTML_DOCUMENT_NODE)
  12051. ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
  12052. #endif
  12053. else
  12054. return(XML_ERR_INTERNAL_ERROR);
  12055. if (ctxt == NULL)
  12056. return(XML_ERR_NO_MEMORY);
  12057. fake = xmlNewComment(NULL);
  12058. if (fake == NULL) {
  12059. xmlFreeParserCtxt(ctxt);
  12060. return(XML_ERR_NO_MEMORY);
  12061. }
  12062. xmlAddChild(node, fake);
  12063. /*
  12064. * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
  12065. * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
  12066. * we must wait until the last moment to free the original one.
  12067. */
  12068. if (doc->dict != NULL) {
  12069. if (ctxt->dict != NULL)
  12070. xmlDictFree(ctxt->dict);
  12071. ctxt->dict = doc->dict;
  12072. } else
  12073. options |= XML_PARSE_NODICT;
  12074. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  12075. xmlDetectSAX2(ctxt);
  12076. ctxt->myDoc = doc;
  12077. if (node->type == XML_ELEMENT_NODE) {
  12078. nodePush(ctxt, node);
  12079. /*
  12080. * initialize the SAX2 namespaces stack
  12081. */
  12082. cur = node;
  12083. while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
  12084. xmlNsPtr ns = cur->nsDef;
  12085. const xmlChar *iprefix, *ihref;
  12086. while (ns != NULL) {
  12087. if (ctxt->dict) {
  12088. iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
  12089. ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
  12090. } else {
  12091. iprefix = ns->prefix;
  12092. ihref = ns->href;
  12093. }
  12094. if (xmlGetNamespace(ctxt, iprefix) == NULL) {
  12095. nsPush(ctxt, iprefix, ihref);
  12096. nsnr++;
  12097. }
  12098. ns = ns->next;
  12099. }
  12100. cur = cur->parent;
  12101. }
  12102. ctxt->instate = XML_PARSER_CONTENT;
  12103. }
  12104. if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
  12105. /*
  12106. * ID/IDREF registration will be done in xmlValidateElement below
  12107. */
  12108. ctxt->loadsubset |= XML_SKIP_IDS;
  12109. }
  12110. #ifdef LIBXML_HTML_ENABLED
  12111. if (doc->type == XML_HTML_DOCUMENT_NODE)
  12112. __htmlParseContent(ctxt);
  12113. else
  12114. #endif
  12115. xmlParseContent(ctxt);
  12116. nsPop(ctxt, nsnr);
  12117. if ((RAW == '<') && (NXT(1) == '/')) {
  12118. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12119. } else if (RAW != 0) {
  12120. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12121. }
  12122. if ((ctxt->node != NULL) && (ctxt->node != node)) {
  12123. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12124. ctxt->wellFormed = 0;
  12125. }
  12126. if (!ctxt->wellFormed) {
  12127. if (ctxt->errNo == 0)
  12128. ret = XML_ERR_INTERNAL_ERROR;
  12129. else
  12130. ret = (xmlParserErrors)ctxt->errNo;
  12131. } else {
  12132. ret = XML_ERR_OK;
  12133. }
  12134. /*
  12135. * Return the newly created nodeset after unlinking it from
  12136. * the pseudo sibling.
  12137. */
  12138. cur = fake->next;
  12139. fake->next = NULL;
  12140. node->last = fake;
  12141. if (cur != NULL) {
  12142. cur->prev = NULL;
  12143. }
  12144. *lst = cur;
  12145. while (cur != NULL) {
  12146. cur->parent = NULL;
  12147. cur = cur->next;
  12148. }
  12149. xmlUnlinkNode(fake);
  12150. xmlFreeNode(fake);
  12151. if (ret != XML_ERR_OK) {
  12152. xmlFreeNodeList(*lst);
  12153. *lst = NULL;
  12154. }
  12155. if (doc->dict != NULL)
  12156. ctxt->dict = NULL;
  12157. xmlFreeParserCtxt(ctxt);
  12158. return(ret);
  12159. #else /* !SAX2 */
  12160. return(XML_ERR_INTERNAL_ERROR);
  12161. #endif
  12162. }
  12163. #ifdef LIBXML_SAX1_ENABLED
  12164. /**
  12165. * xmlParseBalancedChunkMemoryRecover:
  12166. * @doc: the document the chunk pertains to
  12167. * @sax: the SAX handler bloc (possibly NULL)
  12168. * @user_data: The user data returned on SAX callbacks (possibly NULL)
  12169. * @depth: Used for loop detection, use 0
  12170. * @string: the input string in UTF8 or ISO-Latin (zero terminated)
  12171. * @lst: the return value for the set of parsed nodes
  12172. * @recover: return nodes even if the data is broken (use 0)
  12173. *
  12174. *
  12175. * Parse a well-balanced chunk of an XML document
  12176. * called by the parser
  12177. * The allowed sequence for the Well Balanced Chunk is the one defined by
  12178. * the content production in the XML grammar:
  12179. *
  12180. * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
  12181. *
  12182. * Returns 0 if the chunk is well balanced, -1 in case of args problem and
  12183. * the parser error code otherwise
  12184. *
  12185. * In case recover is set to 1, the nodelist will not be empty even if
  12186. * the parsed chunk is not well balanced, assuming the parsing succeeded to
  12187. * some extent.
  12188. */
  12189. int
  12190. xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
  12191. void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
  12192. int recover) {
  12193. xmlParserCtxtPtr ctxt;
  12194. xmlDocPtr newDoc;
  12195. xmlSAXHandlerPtr oldsax = NULL;
  12196. xmlNodePtr content, newRoot;
  12197. int size;
  12198. int ret = 0;
  12199. if (depth > 40) {
  12200. return(XML_ERR_ENTITY_LOOP);
  12201. }
  12202. if (lst != NULL)
  12203. *lst = NULL;
  12204. if (string == NULL)
  12205. return(-1);
  12206. size = xmlStrlen(string);
  12207. ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
  12208. if (ctxt == NULL) return(-1);
  12209. ctxt->userData = ctxt;
  12210. if (sax != NULL) {
  12211. oldsax = ctxt->sax;
  12212. ctxt->sax = sax;
  12213. if (user_data != NULL)
  12214. ctxt->userData = user_data;
  12215. }
  12216. newDoc = xmlNewDoc(BAD_CAST "1.0");
  12217. if (newDoc == NULL) {
  12218. xmlFreeParserCtxt(ctxt);
  12219. return(-1);
  12220. }
  12221. newDoc->properties = XML_DOC_INTERNAL;
  12222. if ((doc != NULL) && (doc->dict != NULL)) {
  12223. xmlDictFree(ctxt->dict);
  12224. ctxt->dict = doc->dict;
  12225. xmlDictReference(ctxt->dict);
  12226. ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
  12227. ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
  12228. ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
  12229. ctxt->dictNames = 1;
  12230. } else {
  12231. xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
  12232. }
  12233. if (doc != NULL) {
  12234. newDoc->intSubset = doc->intSubset;
  12235. newDoc->extSubset = doc->extSubset;
  12236. }
  12237. newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
  12238. if (newRoot == NULL) {
  12239. if (sax != NULL)
  12240. ctxt->sax = oldsax;
  12241. xmlFreeParserCtxt(ctxt);
  12242. newDoc->intSubset = NULL;
  12243. newDoc->extSubset = NULL;
  12244. xmlFreeDoc(newDoc);
  12245. return(-1);
  12246. }
  12247. xmlAddChild((xmlNodePtr) newDoc, newRoot);
  12248. nodePush(ctxt, newRoot);
  12249. if (doc == NULL) {
  12250. ctxt->myDoc = newDoc;
  12251. } else {
  12252. ctxt->myDoc = newDoc;
  12253. newDoc->children->doc = doc;
  12254. /* Ensure that doc has XML spec namespace */
  12255. xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
  12256. newDoc->oldNs = doc->oldNs;
  12257. }
  12258. ctxt->instate = XML_PARSER_CONTENT;
  12259. ctxt->depth = depth;
  12260. /*
  12261. * Doing validity checking on chunk doesn't make sense
  12262. */
  12263. ctxt->validate = 0;
  12264. ctxt->loadsubset = 0;
  12265. xmlDetectSAX2(ctxt);
  12266. if ( doc != NULL ){
  12267. content = doc->children;
  12268. doc->children = NULL;
  12269. xmlParseContent(ctxt);
  12270. doc->children = content;
  12271. }
  12272. else {
  12273. xmlParseContent(ctxt);
  12274. }
  12275. if ((RAW == '<') && (NXT(1) == '/')) {
  12276. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12277. } else if (RAW != 0) {
  12278. xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
  12279. }
  12280. if (ctxt->node != newDoc->children) {
  12281. xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
  12282. }
  12283. if (!ctxt->wellFormed) {
  12284. if (ctxt->errNo == 0)
  12285. ret = 1;
  12286. else
  12287. ret = ctxt->errNo;
  12288. } else {
  12289. ret = 0;
  12290. }
  12291. if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
  12292. xmlNodePtr cur;
  12293. /*
  12294. * Return the newly created nodeset after unlinking it from
  12295. * they pseudo parent.
  12296. */
  12297. cur = newDoc->children->children;
  12298. *lst = cur;
  12299. while (cur != NULL) {
  12300. xmlSetTreeDoc(cur, doc);
  12301. cur->parent = NULL;
  12302. cur = cur->next;
  12303. }
  12304. newDoc->children->children = NULL;
  12305. }
  12306. if (sax != NULL)
  12307. ctxt->sax = oldsax;
  12308. xmlFreeParserCtxt(ctxt);
  12309. newDoc->intSubset = NULL;
  12310. newDoc->extSubset = NULL;
  12311. newDoc->oldNs = NULL;
  12312. xmlFreeDoc(newDoc);
  12313. return(ret);
  12314. }
  12315. /**
  12316. * xmlSAXParseEntity:
  12317. * @sax: the SAX handler block
  12318. * @filename: the filename
  12319. *
  12320. * parse an XML external entity out of context and build a tree.
  12321. * It use the given SAX function block to handle the parsing callback.
  12322. * If sax is NULL, fallback to the default DOM tree building routines.
  12323. *
  12324. * [78] extParsedEnt ::= TextDecl? content
  12325. *
  12326. * This correspond to a "Well Balanced" chunk
  12327. *
  12328. * Returns the resulting document tree
  12329. */
  12330. xmlDocPtr
  12331. xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
  12332. xmlDocPtr ret;
  12333. xmlParserCtxtPtr ctxt;
  12334. ctxt = xmlCreateFileParserCtxt(filename);
  12335. if (ctxt == NULL) {
  12336. return(NULL);
  12337. }
  12338. if (sax != NULL) {
  12339. if (ctxt->sax != NULL)
  12340. xmlFree(ctxt->sax);
  12341. ctxt->sax = sax;
  12342. ctxt->userData = NULL;
  12343. }
  12344. xmlParseExtParsedEnt(ctxt);
  12345. if (ctxt->wellFormed)
  12346. ret = ctxt->myDoc;
  12347. else {
  12348. ret = NULL;
  12349. xmlFreeDoc(ctxt->myDoc);
  12350. ctxt->myDoc = NULL;
  12351. }
  12352. if (sax != NULL)
  12353. ctxt->sax = NULL;
  12354. xmlFreeParserCtxt(ctxt);
  12355. return(ret);
  12356. }
  12357. /**
  12358. * xmlParseEntity:
  12359. * @filename: the filename
  12360. *
  12361. * parse an XML external entity out of context and build a tree.
  12362. *
  12363. * [78] extParsedEnt ::= TextDecl? content
  12364. *
  12365. * This correspond to a "Well Balanced" chunk
  12366. *
  12367. * Returns the resulting document tree
  12368. */
  12369. xmlDocPtr
  12370. xmlParseEntity(const char *filename) {
  12371. return(xmlSAXParseEntity(NULL, filename));
  12372. }
  12373. #endif /* LIBXML_SAX1_ENABLED */
  12374. /**
  12375. * xmlCreateEntityParserCtxtInternal:
  12376. * @URL: the entity URL
  12377. * @ID: the entity PUBLIC ID
  12378. * @base: a possible base for the target URI
  12379. * @pctx: parser context used to set options on new context
  12380. *
  12381. * Create a parser context for an external entity
  12382. * Automatic support for ZLIB/Compress compressed document is provided
  12383. * by default if found at compile-time.
  12384. *
  12385. * Returns the new parser context or NULL
  12386. */
  12387. static xmlParserCtxtPtr
  12388. xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
  12389. const xmlChar *base, xmlParserCtxtPtr pctx) {
  12390. xmlParserCtxtPtr ctxt;
  12391. xmlParserInputPtr inputStream;
  12392. char *directory = NULL;
  12393. xmlChar *uri;
  12394. ctxt = xmlNewParserCtxt();
  12395. if (ctxt == NULL) {
  12396. return(NULL);
  12397. }
  12398. if (pctx != NULL) {
  12399. ctxt->options = pctx->options;
  12400. ctxt->_private = pctx->_private;
  12401. }
  12402. uri = xmlBuildURI(URL, base);
  12403. if (uri == NULL) {
  12404. inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
  12405. if (inputStream == NULL) {
  12406. xmlFreeParserCtxt(ctxt);
  12407. return(NULL);
  12408. }
  12409. inputPush(ctxt, inputStream);
  12410. if ((ctxt->directory == NULL) && (directory == NULL))
  12411. directory = xmlParserGetDirectory((char *)URL);
  12412. if ((ctxt->directory == NULL) && (directory != NULL))
  12413. ctxt->directory = directory;
  12414. } else {
  12415. inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
  12416. if (inputStream == NULL) {
  12417. xmlFree(uri);
  12418. xmlFreeParserCtxt(ctxt);
  12419. return(NULL);
  12420. }
  12421. inputPush(ctxt, inputStream);
  12422. if ((ctxt->directory == NULL) && (directory == NULL))
  12423. directory = xmlParserGetDirectory((char *)uri);
  12424. if ((ctxt->directory == NULL) && (directory != NULL))
  12425. ctxt->directory = directory;
  12426. xmlFree(uri);
  12427. }
  12428. return(ctxt);
  12429. }
  12430. /**
  12431. * xmlCreateEntityParserCtxt:
  12432. * @URL: the entity URL
  12433. * @ID: the entity PUBLIC ID
  12434. * @base: a possible base for the target URI
  12435. *
  12436. * Create a parser context for an external entity
  12437. * Automatic support for ZLIB/Compress compressed document is provided
  12438. * by default if found at compile-time.
  12439. *
  12440. * Returns the new parser context or NULL
  12441. */
  12442. xmlParserCtxtPtr
  12443. xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
  12444. const xmlChar *base) {
  12445. return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
  12446. }
  12447. /************************************************************************
  12448. * *
  12449. * Front ends when parsing from a file *
  12450. * *
  12451. ************************************************************************/
  12452. /**
  12453. * xmlCreateURLParserCtxt:
  12454. * @filename: the filename or URL
  12455. * @options: a combination of xmlParserOption
  12456. *
  12457. * Create a parser context for a file or URL content.
  12458. * Automatic support for ZLIB/Compress compressed document is provided
  12459. * by default if found at compile-time and for file accesses
  12460. *
  12461. * Returns the new parser context or NULL
  12462. */
  12463. xmlParserCtxtPtr
  12464. xmlCreateURLParserCtxt(const char *filename, int options)
  12465. {
  12466. xmlParserCtxtPtr ctxt;
  12467. xmlParserInputPtr inputStream;
  12468. char *directory = NULL;
  12469. ctxt = xmlNewParserCtxt();
  12470. if (ctxt == NULL) {
  12471. xmlErrMemory(NULL, "cannot allocate parser context");
  12472. return(NULL);
  12473. }
  12474. if (options)
  12475. xmlCtxtUseOptionsInternal(ctxt, options, NULL);
  12476. ctxt->linenumbers = 1;
  12477. inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
  12478. if (inputStream == NULL) {
  12479. xmlFreeParserCtxt(ctxt);
  12480. return(NULL);
  12481. }
  12482. inputPush(ctxt, inputStream);
  12483. if ((ctxt->directory == NULL) && (directory == NULL))
  12484. directory = xmlParserGetDirectory(filename);
  12485. if ((ctxt->directory == NULL) && (directory != NULL))
  12486. ctxt->directory = directory;
  12487. return(ctxt);
  12488. }
  12489. /**
  12490. * xmlCreateFileParserCtxt:
  12491. * @filename: the filename
  12492. *
  12493. * Create a parser context for a file content.
  12494. * Automatic support for ZLIB/Compress compressed document is provided
  12495. * by default if found at compile-time.
  12496. *
  12497. * Returns the new parser context or NULL
  12498. */
  12499. xmlParserCtxtPtr
  12500. xmlCreateFileParserCtxt(const char *filename)
  12501. {
  12502. return(xmlCreateURLParserCtxt(filename, 0));
  12503. }
  12504. #ifdef LIBXML_SAX1_ENABLED
  12505. /**
  12506. * xmlSAXParseFileWithData:
  12507. * @sax: the SAX handler block
  12508. * @filename: the filename
  12509. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  12510. * documents
  12511. * @data: the userdata
  12512. *
  12513. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  12514. * compressed document is provided by default if found at compile-time.
  12515. * It use the given SAX function block to handle the parsing callback.
  12516. * If sax is NULL, fallback to the default DOM tree building routines.
  12517. *
  12518. * User data (void *) is stored within the parser context in the
  12519. * context's _private member, so it is available nearly everywhere in libxml
  12520. *
  12521. * Returns the resulting document tree
  12522. */
  12523. xmlDocPtr
  12524. xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
  12525. int recovery, void *data) {
  12526. xmlDocPtr ret;
  12527. xmlParserCtxtPtr ctxt;
  12528. xmlInitParser();
  12529. ctxt = xmlCreateFileParserCtxt(filename);
  12530. if (ctxt == NULL) {
  12531. return(NULL);
  12532. }
  12533. if (sax != NULL) {
  12534. if (ctxt->sax != NULL)
  12535. xmlFree(ctxt->sax);
  12536. ctxt->sax = sax;
  12537. }
  12538. xmlDetectSAX2(ctxt);
  12539. if (data!=NULL) {
  12540. ctxt->_private = data;
  12541. }
  12542. if (ctxt->directory == NULL)
  12543. ctxt->directory = xmlParserGetDirectory(filename);
  12544. ctxt->recovery = recovery;
  12545. xmlParseDocument(ctxt);
  12546. if ((ctxt->wellFormed) || recovery) {
  12547. ret = ctxt->myDoc;
  12548. if (ret != NULL) {
  12549. if (ctxt->input->buf->compressed > 0)
  12550. ret->compression = 9;
  12551. else
  12552. ret->compression = ctxt->input->buf->compressed;
  12553. }
  12554. }
  12555. else {
  12556. ret = NULL;
  12557. xmlFreeDoc(ctxt->myDoc);
  12558. ctxt->myDoc = NULL;
  12559. }
  12560. if (sax != NULL)
  12561. ctxt->sax = NULL;
  12562. xmlFreeParserCtxt(ctxt);
  12563. return(ret);
  12564. }
  12565. /**
  12566. * xmlSAXParseFile:
  12567. * @sax: the SAX handler block
  12568. * @filename: the filename
  12569. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  12570. * documents
  12571. *
  12572. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  12573. * compressed document is provided by default if found at compile-time.
  12574. * It use the given SAX function block to handle the parsing callback.
  12575. * If sax is NULL, fallback to the default DOM tree building routines.
  12576. *
  12577. * Returns the resulting document tree
  12578. */
  12579. xmlDocPtr
  12580. xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
  12581. int recovery) {
  12582. return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
  12583. }
  12584. /**
  12585. * xmlRecoverDoc:
  12586. * @cur: a pointer to an array of xmlChar
  12587. *
  12588. * parse an XML in-memory document and build a tree.
  12589. * In the case the document is not Well Formed, a attempt to build a
  12590. * tree is tried anyway
  12591. *
  12592. * Returns the resulting document tree or NULL in case of failure
  12593. */
  12594. xmlDocPtr
  12595. xmlRecoverDoc(const xmlChar *cur) {
  12596. return(xmlSAXParseDoc(NULL, cur, 1));
  12597. }
  12598. /**
  12599. * xmlParseFile:
  12600. * @filename: the filename
  12601. *
  12602. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  12603. * compressed document is provided by default if found at compile-time.
  12604. *
  12605. * Returns the resulting document tree if the file was wellformed,
  12606. * NULL otherwise.
  12607. */
  12608. xmlDocPtr
  12609. xmlParseFile(const char *filename) {
  12610. return(xmlSAXParseFile(NULL, filename, 0));
  12611. }
  12612. /**
  12613. * xmlRecoverFile:
  12614. * @filename: the filename
  12615. *
  12616. * parse an XML file and build a tree. Automatic support for ZLIB/Compress
  12617. * compressed document is provided by default if found at compile-time.
  12618. * In the case the document is not Well Formed, it attempts to build
  12619. * a tree anyway
  12620. *
  12621. * Returns the resulting document tree or NULL in case of failure
  12622. */
  12623. xmlDocPtr
  12624. xmlRecoverFile(const char *filename) {
  12625. return(xmlSAXParseFile(NULL, filename, 1));
  12626. }
  12627. /**
  12628. * xmlSetupParserForBuffer:
  12629. * @ctxt: an XML parser context
  12630. * @buffer: a xmlChar * buffer
  12631. * @filename: a file name
  12632. *
  12633. * Setup the parser context to parse a new buffer; Clears any prior
  12634. * contents from the parser context. The buffer parameter must not be
  12635. * NULL, but the filename parameter can be
  12636. */
  12637. void
  12638. xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
  12639. const char* filename)
  12640. {
  12641. xmlParserInputPtr input;
  12642. if ((ctxt == NULL) || (buffer == NULL))
  12643. return;
  12644. input = xmlNewInputStream(ctxt);
  12645. if (input == NULL) {
  12646. xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
  12647. xmlClearParserCtxt(ctxt);
  12648. return;
  12649. }
  12650. xmlClearParserCtxt(ctxt);
  12651. if (filename != NULL)
  12652. input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
  12653. input->base = buffer;
  12654. input->cur = buffer;
  12655. input->end = &buffer[xmlStrlen(buffer)];
  12656. inputPush(ctxt, input);
  12657. }
  12658. /**
  12659. * xmlSAXUserParseFile:
  12660. * @sax: a SAX handler
  12661. * @user_data: The user data returned on SAX callbacks
  12662. * @filename: a file name
  12663. *
  12664. * parse an XML file and call the given SAX handler routines.
  12665. * Automatic support for ZLIB/Compress compressed document is provided
  12666. *
  12667. * Returns 0 in case of success or a error number otherwise
  12668. */
  12669. int
  12670. xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
  12671. const char *filename) {
  12672. int ret = 0;
  12673. xmlParserCtxtPtr ctxt;
  12674. ctxt = xmlCreateFileParserCtxt(filename);
  12675. if (ctxt == NULL) return -1;
  12676. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  12677. xmlFree(ctxt->sax);
  12678. ctxt->sax = sax;
  12679. xmlDetectSAX2(ctxt);
  12680. if (user_data != NULL)
  12681. ctxt->userData = user_data;
  12682. xmlParseDocument(ctxt);
  12683. if (ctxt->wellFormed)
  12684. ret = 0;
  12685. else {
  12686. if (ctxt->errNo != 0)
  12687. ret = ctxt->errNo;
  12688. else
  12689. ret = -1;
  12690. }
  12691. if (sax != NULL)
  12692. ctxt->sax = NULL;
  12693. if (ctxt->myDoc != NULL) {
  12694. xmlFreeDoc(ctxt->myDoc);
  12695. ctxt->myDoc = NULL;
  12696. }
  12697. xmlFreeParserCtxt(ctxt);
  12698. return ret;
  12699. }
  12700. #endif /* LIBXML_SAX1_ENABLED */
  12701. /************************************************************************
  12702. * *
  12703. * Front ends when parsing from memory *
  12704. * *
  12705. ************************************************************************/
  12706. /**
  12707. * xmlCreateMemoryParserCtxt:
  12708. * @buffer: a pointer to a char array
  12709. * @size: the size of the array
  12710. *
  12711. * Create a parser context for an XML in-memory document.
  12712. *
  12713. * Returns the new parser context or NULL
  12714. */
  12715. xmlParserCtxtPtr
  12716. xmlCreateMemoryParserCtxt(const char *buffer, int size) {
  12717. xmlParserCtxtPtr ctxt;
  12718. xmlParserInputPtr input;
  12719. xmlParserInputBufferPtr buf;
  12720. if (buffer == NULL)
  12721. return(NULL);
  12722. if (size <= 0)
  12723. return(NULL);
  12724. ctxt = xmlNewParserCtxt();
  12725. if (ctxt == NULL)
  12726. return(NULL);
  12727. /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
  12728. buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  12729. if (buf == NULL) {
  12730. xmlFreeParserCtxt(ctxt);
  12731. return(NULL);
  12732. }
  12733. input = xmlNewInputStream(ctxt);
  12734. if (input == NULL) {
  12735. xmlFreeParserInputBuffer(buf);
  12736. xmlFreeParserCtxt(ctxt);
  12737. return(NULL);
  12738. }
  12739. input->filename = NULL;
  12740. input->buf = buf;
  12741. input->base = input->buf->buffer->content;
  12742. input->cur = input->buf->buffer->content;
  12743. input->end = &input->buf->buffer->content[input->buf->buffer->use];
  12744. inputPush(ctxt, input);
  12745. return(ctxt);
  12746. }
  12747. #ifdef LIBXML_SAX1_ENABLED
  12748. /**
  12749. * xmlSAXParseMemoryWithData:
  12750. * @sax: the SAX handler block
  12751. * @buffer: an pointer to a char array
  12752. * @size: the size of the array
  12753. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  12754. * documents
  12755. * @data: the userdata
  12756. *
  12757. * parse an XML in-memory block and use the given SAX function block
  12758. * to handle the parsing callback. If sax is NULL, fallback to the default
  12759. * DOM tree building routines.
  12760. *
  12761. * User data (void *) is stored within the parser context in the
  12762. * context's _private member, so it is available nearly everywhere in libxml
  12763. *
  12764. * Returns the resulting document tree
  12765. */
  12766. xmlDocPtr
  12767. xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
  12768. int size, int recovery, void *data) {
  12769. xmlDocPtr ret;
  12770. xmlParserCtxtPtr ctxt;
  12771. xmlInitParser();
  12772. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  12773. if (ctxt == NULL) return(NULL);
  12774. if (sax != NULL) {
  12775. if (ctxt->sax != NULL)
  12776. xmlFree(ctxt->sax);
  12777. ctxt->sax = sax;
  12778. }
  12779. xmlDetectSAX2(ctxt);
  12780. if (data!=NULL) {
  12781. ctxt->_private=data;
  12782. }
  12783. ctxt->recovery = recovery;
  12784. xmlParseDocument(ctxt);
  12785. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  12786. else {
  12787. ret = NULL;
  12788. xmlFreeDoc(ctxt->myDoc);
  12789. ctxt->myDoc = NULL;
  12790. }
  12791. if (sax != NULL)
  12792. ctxt->sax = NULL;
  12793. xmlFreeParserCtxt(ctxt);
  12794. return(ret);
  12795. }
  12796. /**
  12797. * xmlSAXParseMemory:
  12798. * @sax: the SAX handler block
  12799. * @buffer: an pointer to a char array
  12800. * @size: the size of the array
  12801. * @recovery: work in recovery mode, i.e. tries to read not Well Formed
  12802. * documents
  12803. *
  12804. * parse an XML in-memory block and use the given SAX function block
  12805. * to handle the parsing callback. If sax is NULL, fallback to the default
  12806. * DOM tree building routines.
  12807. *
  12808. * Returns the resulting document tree
  12809. */
  12810. xmlDocPtr
  12811. xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
  12812. int size, int recovery) {
  12813. return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
  12814. }
  12815. /**
  12816. * xmlParseMemory:
  12817. * @buffer: an pointer to a char array
  12818. * @size: the size of the array
  12819. *
  12820. * parse an XML in-memory block and build a tree.
  12821. *
  12822. * Returns the resulting document tree
  12823. */
  12824. xmlDocPtr xmlParseMemory(const char *buffer, int size) {
  12825. return(xmlSAXParseMemory(NULL, buffer, size, 0));
  12826. }
  12827. /**
  12828. * xmlRecoverMemory:
  12829. * @buffer: an pointer to a char array
  12830. * @size: the size of the array
  12831. *
  12832. * parse an XML in-memory block and build a tree.
  12833. * In the case the document is not Well Formed, an attempt to
  12834. * build a tree is tried anyway
  12835. *
  12836. * Returns the resulting document tree or NULL in case of error
  12837. */
  12838. xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
  12839. return(xmlSAXParseMemory(NULL, buffer, size, 1));
  12840. }
  12841. /**
  12842. * xmlSAXUserParseMemory:
  12843. * @sax: a SAX handler
  12844. * @user_data: The user data returned on SAX callbacks
  12845. * @buffer: an in-memory XML document input
  12846. * @size: the length of the XML document in bytes
  12847. *
  12848. * A better SAX parsing routine.
  12849. * parse an XML in-memory buffer and call the given SAX handler routines.
  12850. *
  12851. * Returns 0 in case of success or a error number otherwise
  12852. */
  12853. int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
  12854. const char *buffer, int size) {
  12855. int ret = 0;
  12856. xmlParserCtxtPtr ctxt;
  12857. xmlInitParser();
  12858. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  12859. if (ctxt == NULL) return -1;
  12860. if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
  12861. xmlFree(ctxt->sax);
  12862. ctxt->sax = sax;
  12863. xmlDetectSAX2(ctxt);
  12864. if (user_data != NULL)
  12865. ctxt->userData = user_data;
  12866. xmlParseDocument(ctxt);
  12867. if (ctxt->wellFormed)
  12868. ret = 0;
  12869. else {
  12870. if (ctxt->errNo != 0)
  12871. ret = ctxt->errNo;
  12872. else
  12873. ret = -1;
  12874. }
  12875. if (sax != NULL)
  12876. ctxt->sax = NULL;
  12877. if (ctxt->myDoc != NULL) {
  12878. xmlFreeDoc(ctxt->myDoc);
  12879. ctxt->myDoc = NULL;
  12880. }
  12881. xmlFreeParserCtxt(ctxt);
  12882. return ret;
  12883. }
  12884. #endif /* LIBXML_SAX1_ENABLED */
  12885. /**
  12886. * xmlCreateDocParserCtxt:
  12887. * @cur: a pointer to an array of xmlChar
  12888. *
  12889. * Creates a parser context for an XML in-memory document.
  12890. *
  12891. * Returns the new parser context or NULL
  12892. */
  12893. xmlParserCtxtPtr
  12894. xmlCreateDocParserCtxt(const xmlChar *cur) {
  12895. int len;
  12896. if (cur == NULL)
  12897. return(NULL);
  12898. len = xmlStrlen(cur);
  12899. return(xmlCreateMemoryParserCtxt((const char *)cur, len));
  12900. }
  12901. #ifdef LIBXML_SAX1_ENABLED
  12902. /**
  12903. * xmlSAXParseDoc:
  12904. * @sax: the SAX handler block
  12905. * @cur: a pointer to an array of xmlChar
  12906. * @recovery: work in recovery mode, i.e. tries to read no Well Formed
  12907. * documents
  12908. *
  12909. * parse an XML in-memory document and build a tree.
  12910. * It use the given SAX function block to handle the parsing callback.
  12911. * If sax is NULL, fallback to the default DOM tree building routines.
  12912. *
  12913. * Returns the resulting document tree
  12914. */
  12915. xmlDocPtr
  12916. xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
  12917. xmlDocPtr ret;
  12918. xmlParserCtxtPtr ctxt;
  12919. xmlSAXHandlerPtr oldsax = NULL;
  12920. if (cur == NULL) return(NULL);
  12921. ctxt = xmlCreateDocParserCtxt(cur);
  12922. if (ctxt == NULL) return(NULL);
  12923. if (sax != NULL) {
  12924. oldsax = ctxt->sax;
  12925. ctxt->sax = sax;
  12926. ctxt->userData = NULL;
  12927. }
  12928. xmlDetectSAX2(ctxt);
  12929. xmlParseDocument(ctxt);
  12930. if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
  12931. else {
  12932. ret = NULL;
  12933. xmlFreeDoc(ctxt->myDoc);
  12934. ctxt->myDoc = NULL;
  12935. }
  12936. if (sax != NULL)
  12937. ctxt->sax = oldsax;
  12938. xmlFreeParserCtxt(ctxt);
  12939. return(ret);
  12940. }
  12941. /**
  12942. * xmlParseDoc:
  12943. * @cur: a pointer to an array of xmlChar
  12944. *
  12945. * parse an XML in-memory document and build a tree.
  12946. *
  12947. * Returns the resulting document tree
  12948. */
  12949. xmlDocPtr
  12950. xmlParseDoc(const xmlChar *cur) {
  12951. return(xmlSAXParseDoc(NULL, cur, 0));
  12952. }
  12953. #endif /* LIBXML_SAX1_ENABLED */
  12954. #ifdef LIBXML_LEGACY_ENABLED
  12955. /************************************************************************
  12956. * *
  12957. * Specific function to keep track of entities references *
  12958. * and used by the XSLT debugger *
  12959. * *
  12960. ************************************************************************/
  12961. static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
  12962. /**
  12963. * xmlAddEntityReference:
  12964. * @ent : A valid entity
  12965. * @firstNode : A valid first node for children of entity
  12966. * @lastNode : A valid last node of children entity
  12967. *
  12968. * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
  12969. */
  12970. static void
  12971. xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
  12972. xmlNodePtr lastNode)
  12973. {
  12974. if (xmlEntityRefFunc != NULL) {
  12975. (*xmlEntityRefFunc) (ent, firstNode, lastNode);
  12976. }
  12977. }
  12978. /**
  12979. * xmlSetEntityReferenceFunc:
  12980. * @func: A valid function
  12981. *
  12982. * Set the function to call call back when a xml reference has been made
  12983. */
  12984. void
  12985. xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
  12986. {
  12987. xmlEntityRefFunc = func;
  12988. }
  12989. #endif /* LIBXML_LEGACY_ENABLED */
  12990. /************************************************************************
  12991. * *
  12992. * Miscellaneous *
  12993. * *
  12994. ************************************************************************/
  12995. #ifdef LIBXML_XPATH_ENABLED
  12996. #include <libxml/xpath.h>
  12997. #endif
  12998. extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
  12999. static int xmlParserInitialized = 0;
  13000. /**
  13001. * xmlInitParser:
  13002. *
  13003. * Initialization function for the XML parser.
  13004. * This is not reentrant. Call once before processing in case of
  13005. * use in multithreaded programs.
  13006. */
  13007. void
  13008. xmlInitParser(void) {
  13009. if (xmlParserInitialized != 0)
  13010. return;
  13011. #ifdef LIBXML_THREAD_ENABLED
  13012. __xmlGlobalInitMutexLock();
  13013. if (xmlParserInitialized == 0) {
  13014. #endif
  13015. xmlInitGlobals();
  13016. xmlInitThreads();
  13017. if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
  13018. (xmlGenericError == NULL))
  13019. initGenericErrorDefaultFunc(NULL);
  13020. xmlInitMemory();
  13021. xmlInitCharEncodingHandlers();
  13022. xmlDefaultSAXHandlerInit();
  13023. xmlRegisterDefaultInputCallbacks();
  13024. #ifdef LIBXML_OUTPUT_ENABLED
  13025. xmlRegisterDefaultOutputCallbacks();
  13026. #endif /* LIBXML_OUTPUT_ENABLED */
  13027. #ifdef LIBXML_HTML_ENABLED
  13028. htmlInitAutoClose();
  13029. htmlDefaultSAXHandlerInit();
  13030. #endif
  13031. #ifdef LIBXML_XPATH_ENABLED
  13032. xmlXPathInit();
  13033. #endif
  13034. xmlParserInitialized = 1;
  13035. #ifdef LIBXML_THREAD_ENABLED
  13036. }
  13037. __xmlGlobalInitMutexUnlock();
  13038. #endif
  13039. }
  13040. /**
  13041. * xmlCleanupParser:
  13042. *
  13043. * This function name is somewhat misleading. It does not clean up
  13044. * parser state, it cleans up memory allocated by the library itself.
  13045. * It is a cleanup function for the XML library. It tries to reclaim all
  13046. * related global memory allocated for the library processing.
  13047. * It doesn't deallocate any document related memory. One should
  13048. * call xmlCleanupParser() only when the process has finished using
  13049. * the library and all XML/HTML documents built with it.
  13050. * See also xmlInitParser() which has the opposite function of preparing
  13051. * the library for operations.
  13052. *
  13053. * WARNING: if your application is multithreaded or has plugin support
  13054. * calling this may crash the application if another thread or
  13055. * a plugin is still using libxml2. It's sometimes very hard to
  13056. * guess if libxml2 is in use in the application, some libraries
  13057. * or plugins may use it without notice. In case of doubt abstain
  13058. * from calling this function or do it just before calling exit()
  13059. * to avoid leak reports from valgrind !
  13060. */
  13061. void
  13062. xmlCleanupParser(void) {
  13063. if (!xmlParserInitialized)
  13064. return;
  13065. xmlCleanupCharEncodingHandlers();
  13066. #ifdef LIBXML_CATALOG_ENABLED
  13067. xmlCatalogCleanup();
  13068. #endif
  13069. xmlDictCleanup();
  13070. xmlCleanupInputCallbacks();
  13071. #ifdef LIBXML_OUTPUT_ENABLED
  13072. xmlCleanupOutputCallbacks();
  13073. #endif
  13074. #ifdef LIBXML_SCHEMAS_ENABLED
  13075. xmlSchemaCleanupTypes();
  13076. xmlRelaxNGCleanupTypes();
  13077. #endif
  13078. xmlCleanupGlobals();
  13079. xmlResetLastError();
  13080. xmlCleanupThreads(); /* must be last if called not from the main thread */
  13081. xmlCleanupMemory();
  13082. xmlParserInitialized = 0;
  13083. }
  13084. /************************************************************************
  13085. * *
  13086. * New set (2.6.0) of simpler and more flexible APIs *
  13087. * *
  13088. ************************************************************************/
  13089. /**
  13090. * DICT_FREE:
  13091. * @str: a string
  13092. *
  13093. * Free a string if it is not owned by the "dict" dictionnary in the
  13094. * current scope
  13095. */
  13096. #define DICT_FREE(str) \
  13097. if ((str) && ((!dict) || \
  13098. (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
  13099. xmlFree((char *)(str));
  13100. /**
  13101. * xmlCtxtReset:
  13102. * @ctxt: an XML parser context
  13103. *
  13104. * Reset a parser context
  13105. */
  13106. void
  13107. xmlCtxtReset(xmlParserCtxtPtr ctxt)
  13108. {
  13109. xmlParserInputPtr input;
  13110. xmlDictPtr dict;
  13111. if (ctxt == NULL)
  13112. return;
  13113. dict = ctxt->dict;
  13114. while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
  13115. xmlFreeInputStream(input);
  13116. }
  13117. ctxt->inputNr = 0;
  13118. ctxt->input = NULL;
  13119. ctxt->spaceNr = 0;
  13120. if (ctxt->spaceTab != NULL) {
  13121. ctxt->spaceTab[0] = -1;
  13122. ctxt->space = &ctxt->spaceTab[0];
  13123. } else {
  13124. ctxt->space = NULL;
  13125. }
  13126. ctxt->nodeNr = 0;
  13127. ctxt->node = NULL;
  13128. ctxt->nameNr = 0;
  13129. ctxt->name = NULL;
  13130. DICT_FREE(ctxt->version);
  13131. ctxt->version = NULL;
  13132. DICT_FREE(ctxt->encoding);
  13133. ctxt->encoding = NULL;
  13134. DICT_FREE(ctxt->directory);
  13135. ctxt->directory = NULL;
  13136. DICT_FREE(ctxt->extSubURI);
  13137. ctxt->extSubURI = NULL;
  13138. DICT_FREE(ctxt->extSubSystem);
  13139. ctxt->extSubSystem = NULL;
  13140. if (ctxt->myDoc != NULL)
  13141. xmlFreeDoc(ctxt->myDoc);
  13142. ctxt->myDoc = NULL;
  13143. ctxt->standalone = -1;
  13144. ctxt->hasExternalSubset = 0;
  13145. ctxt->hasPErefs = 0;
  13146. ctxt->html = 0;
  13147. ctxt->external = 0;
  13148. ctxt->instate = XML_PARSER_START;
  13149. ctxt->token = 0;
  13150. ctxt->wellFormed = 1;
  13151. ctxt->nsWellFormed = 1;
  13152. ctxt->disableSAX = 0;
  13153. ctxt->valid = 1;
  13154. #if 0
  13155. ctxt->vctxt.userData = ctxt;
  13156. ctxt->vctxt.error = xmlParserValidityError;
  13157. ctxt->vctxt.warning = xmlParserValidityWarning;
  13158. #endif
  13159. ctxt->record_info = 0;
  13160. ctxt->nbChars = 0;
  13161. ctxt->checkIndex = 0;
  13162. ctxt->inSubset = 0;
  13163. ctxt->errNo = XML_ERR_OK;
  13164. ctxt->depth = 0;
  13165. ctxt->charset = XML_CHAR_ENCODING_UTF8;
  13166. ctxt->catalogs = NULL;
  13167. ctxt->nbentities = 0;
  13168. ctxt->sizeentities = 0;
  13169. xmlInitNodeInfoSeq(&ctxt->node_seq);
  13170. if (ctxt->attsDefault != NULL) {
  13171. xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
  13172. ctxt->attsDefault = NULL;
  13173. }
  13174. if (ctxt->attsSpecial != NULL) {
  13175. xmlHashFree(ctxt->attsSpecial, NULL);
  13176. ctxt->attsSpecial = NULL;
  13177. }
  13178. #ifdef LIBXML_CATALOG_ENABLED
  13179. if (ctxt->catalogs != NULL)
  13180. xmlCatalogFreeLocal(ctxt->catalogs);
  13181. #endif
  13182. if (ctxt->lastError.code != XML_ERR_OK)
  13183. xmlResetError(&ctxt->lastError);
  13184. }
  13185. /**
  13186. * xmlCtxtResetPush:
  13187. * @ctxt: an XML parser context
  13188. * @chunk: a pointer to an array of chars
  13189. * @size: number of chars in the array
  13190. * @filename: an optional file name or URI
  13191. * @encoding: the document encoding, or NULL
  13192. *
  13193. * Reset a push parser context
  13194. *
  13195. * Returns 0 in case of success and 1 in case of error
  13196. */
  13197. int
  13198. xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
  13199. int size, const char *filename, const char *encoding)
  13200. {
  13201. xmlParserInputPtr inputStream;
  13202. xmlParserInputBufferPtr buf;
  13203. xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
  13204. if (ctxt == NULL)
  13205. return(1);
  13206. if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
  13207. enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
  13208. buf = xmlAllocParserInputBuffer(enc);
  13209. if (buf == NULL)
  13210. return(1);
  13211. if (ctxt == NULL) {
  13212. xmlFreeParserInputBuffer(buf);
  13213. return(1);
  13214. }
  13215. xmlCtxtReset(ctxt);
  13216. if (ctxt->pushTab == NULL) {
  13217. ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
  13218. sizeof(xmlChar *));
  13219. if (ctxt->pushTab == NULL) {
  13220. xmlErrMemory(ctxt, NULL);
  13221. xmlFreeParserInputBuffer(buf);
  13222. return(1);
  13223. }
  13224. }
  13225. if (filename == NULL) {
  13226. ctxt->directory = NULL;
  13227. } else {
  13228. ctxt->directory = xmlParserGetDirectory(filename);
  13229. }
  13230. inputStream = xmlNewInputStream(ctxt);
  13231. if (inputStream == NULL) {
  13232. xmlFreeParserInputBuffer(buf);
  13233. return(1);
  13234. }
  13235. if (filename == NULL)
  13236. inputStream->filename = NULL;
  13237. else
  13238. inputStream->filename = (char *)
  13239. xmlCanonicPath((const xmlChar *) filename);
  13240. inputStream->buf = buf;
  13241. inputStream->base = inputStream->buf->buffer->content;
  13242. inputStream->cur = inputStream->buf->buffer->content;
  13243. inputStream->end =
  13244. &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
  13245. inputPush(ctxt, inputStream);
  13246. if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
  13247. (ctxt->input->buf != NULL)) {
  13248. int base = ctxt->input->base - ctxt->input->buf->buffer->content;
  13249. int cur = ctxt->input->cur - ctxt->input->base;
  13250. xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
  13251. ctxt->input->base = ctxt->input->buf->buffer->content + base;
  13252. ctxt->input->cur = ctxt->input->base + cur;
  13253. ctxt->input->end =
  13254. &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
  13255. use];
  13256. #ifdef DEBUG_PUSH
  13257. xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
  13258. #endif
  13259. }
  13260. if (encoding != NULL) {
  13261. xmlCharEncodingHandlerPtr hdlr;
  13262. if (ctxt->encoding != NULL)
  13263. xmlFree((xmlChar *) ctxt->encoding);
  13264. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13265. hdlr = xmlFindCharEncodingHandler(encoding);
  13266. if (hdlr != NULL) {
  13267. xmlSwitchToEncoding(ctxt, hdlr);
  13268. } else {
  13269. xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
  13270. "Unsupported encoding %s\n", BAD_CAST encoding);
  13271. }
  13272. } else if (enc != XML_CHAR_ENCODING_NONE) {
  13273. xmlSwitchEncoding(ctxt, enc);
  13274. }
  13275. return(0);
  13276. }
  13277. /**
  13278. * xmlCtxtUseOptionsInternal:
  13279. * @ctxt: an XML parser context
  13280. * @options: a combination of xmlParserOption
  13281. * @encoding: the user provided encoding to use
  13282. *
  13283. * Applies the options to the parser context
  13284. *
  13285. * Returns 0 in case of success, the set of unknown or unimplemented options
  13286. * in case of error.
  13287. */
  13288. static int
  13289. xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
  13290. {
  13291. if (ctxt == NULL)
  13292. return(-1);
  13293. if (encoding != NULL) {
  13294. if (ctxt->encoding != NULL)
  13295. xmlFree((xmlChar *) ctxt->encoding);
  13296. ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
  13297. }
  13298. if (options & XML_PARSE_RECOVER) {
  13299. ctxt->recovery = 1;
  13300. options -= XML_PARSE_RECOVER;
  13301. ctxt->options |= XML_PARSE_RECOVER;
  13302. } else
  13303. ctxt->recovery = 0;
  13304. if (options & XML_PARSE_DTDLOAD) {
  13305. ctxt->loadsubset = XML_DETECT_IDS;
  13306. options -= XML_PARSE_DTDLOAD;
  13307. ctxt->options |= XML_PARSE_DTDLOAD;
  13308. } else
  13309. ctxt->loadsubset = 0;
  13310. if (options & XML_PARSE_DTDATTR) {
  13311. ctxt->loadsubset |= XML_COMPLETE_ATTRS;
  13312. options -= XML_PARSE_DTDATTR;
  13313. ctxt->options |= XML_PARSE_DTDATTR;
  13314. }
  13315. if (options & XML_PARSE_NOENT) {
  13316. ctxt->replaceEntities = 1;
  13317. /* ctxt->loadsubset |= XML_DETECT_IDS; */
  13318. options -= XML_PARSE_NOENT;
  13319. ctxt->options |= XML_PARSE_NOENT;
  13320. } else
  13321. ctxt->replaceEntities = 0;
  13322. if (options & XML_PARSE_PEDANTIC) {
  13323. ctxt->pedantic = 1;
  13324. options -= XML_PARSE_PEDANTIC;
  13325. ctxt->options |= XML_PARSE_PEDANTIC;
  13326. } else
  13327. ctxt->pedantic = 0;
  13328. if (options & XML_PARSE_NOBLANKS) {
  13329. ctxt->keepBlanks = 0;
  13330. ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
  13331. options -= XML_PARSE_NOBLANKS;
  13332. ctxt->options |= XML_PARSE_NOBLANKS;
  13333. } else
  13334. ctxt->keepBlanks = 1;
  13335. if (options & XML_PARSE_DTDVALID) {
  13336. ctxt->validate = 1;
  13337. if (options & XML_PARSE_NOWARNING)
  13338. ctxt->vctxt.warning = NULL;
  13339. if (options & XML_PARSE_NOERROR)
  13340. ctxt->vctxt.error = NULL;
  13341. options -= XML_PARSE_DTDVALID;
  13342. ctxt->options |= XML_PARSE_DTDVALID;
  13343. } else
  13344. ctxt->validate = 0;
  13345. if (options & XML_PARSE_NOWARNING) {
  13346. ctxt->sax->warning = NULL;
  13347. options -= XML_PARSE_NOWARNING;
  13348. }
  13349. if (options & XML_PARSE_NOERROR) {
  13350. ctxt->sax->error = NULL;
  13351. ctxt->sax->fatalError = NULL;
  13352. options -= XML_PARSE_NOERROR;
  13353. }
  13354. #ifdef LIBXML_SAX1_ENABLED
  13355. if (options & XML_PARSE_SAX1) {
  13356. ctxt->sax->startElement = xmlSAX2StartElement;
  13357. ctxt->sax->endElement = xmlSAX2EndElement;
  13358. ctxt->sax->startElementNs = NULL;
  13359. ctxt->sax->endElementNs = NULL;
  13360. ctxt->sax->initialized = 1;
  13361. options -= XML_PARSE_SAX1;
  13362. ctxt->options |= XML_PARSE_SAX1;
  13363. }
  13364. #endif /* LIBXML_SAX1_ENABLED */
  13365. if (options & XML_PARSE_NODICT) {
  13366. ctxt->dictNames = 0;
  13367. options -= XML_PARSE_NODICT;
  13368. ctxt->options |= XML_PARSE_NODICT;
  13369. } else {
  13370. ctxt->dictNames = 1;
  13371. }
  13372. if (options & XML_PARSE_NOCDATA) {
  13373. ctxt->sax->cdataBlock = NULL;
  13374. options -= XML_PARSE_NOCDATA;
  13375. ctxt->options |= XML_PARSE_NOCDATA;
  13376. }
  13377. if (options & XML_PARSE_NSCLEAN) {
  13378. ctxt->options |= XML_PARSE_NSCLEAN;
  13379. options -= XML_PARSE_NSCLEAN;
  13380. }
  13381. if (options & XML_PARSE_NONET) {
  13382. ctxt->options |= XML_PARSE_NONET;
  13383. options -= XML_PARSE_NONET;
  13384. }
  13385. if (options & XML_PARSE_COMPACT) {
  13386. ctxt->options |= XML_PARSE_COMPACT;
  13387. options -= XML_PARSE_COMPACT;
  13388. }
  13389. if (options & XML_PARSE_OLD10) {
  13390. ctxt->options |= XML_PARSE_OLD10;
  13391. options -= XML_PARSE_OLD10;
  13392. }
  13393. if (options & XML_PARSE_NOBASEFIX) {
  13394. ctxt->options |= XML_PARSE_NOBASEFIX;
  13395. options -= XML_PARSE_NOBASEFIX;
  13396. }
  13397. if (options & XML_PARSE_HUGE) {
  13398. ctxt->options |= XML_PARSE_HUGE;
  13399. options -= XML_PARSE_HUGE;
  13400. }
  13401. if (options & XML_PARSE_OLDSAX) {
  13402. ctxt->options |= XML_PARSE_OLDSAX;
  13403. options -= XML_PARSE_OLDSAX;
  13404. }
  13405. ctxt->linenumbers = 1;
  13406. return (options);
  13407. }
  13408. /**
  13409. * xmlCtxtUseOptions:
  13410. * @ctxt: an XML parser context
  13411. * @options: a combination of xmlParserOption
  13412. *
  13413. * Applies the options to the parser context
  13414. *
  13415. * Returns 0 in case of success, the set of unknown or unimplemented options
  13416. * in case of error.
  13417. */
  13418. int
  13419. xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
  13420. {
  13421. return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
  13422. }
  13423. /**
  13424. * xmlDoRead:
  13425. * @ctxt: an XML parser context
  13426. * @URL: the base URL to use for the document
  13427. * @encoding: the document encoding, or NULL
  13428. * @options: a combination of xmlParserOption
  13429. * @reuse: keep the context for reuse
  13430. *
  13431. * Common front-end for the xmlRead functions
  13432. *
  13433. * Returns the resulting document tree or NULL
  13434. */
  13435. static xmlDocPtr
  13436. xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
  13437. int options, int reuse)
  13438. {
  13439. xmlDocPtr ret;
  13440. xmlCtxtUseOptionsInternal(ctxt, options, encoding);
  13441. if (encoding != NULL) {
  13442. xmlCharEncodingHandlerPtr hdlr;
  13443. hdlr = xmlFindCharEncodingHandler(encoding);
  13444. if (hdlr != NULL)
  13445. xmlSwitchToEncoding(ctxt, hdlr);
  13446. }
  13447. if ((URL != NULL) && (ctxt->input != NULL) &&
  13448. (ctxt->input->filename == NULL))
  13449. ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
  13450. xmlParseDocument(ctxt);
  13451. if ((ctxt->wellFormed) || ctxt->recovery)
  13452. ret = ctxt->myDoc;
  13453. else {
  13454. ret = NULL;
  13455. if (ctxt->myDoc != NULL) {
  13456. xmlFreeDoc(ctxt->myDoc);
  13457. }
  13458. }
  13459. ctxt->myDoc = NULL;
  13460. if (!reuse) {
  13461. xmlFreeParserCtxt(ctxt);
  13462. }
  13463. return (ret);
  13464. }
  13465. /**
  13466. * xmlReadDoc:
  13467. * @cur: a pointer to a zero terminated string
  13468. * @URL: the base URL to use for the document
  13469. * @encoding: the document encoding, or NULL
  13470. * @options: a combination of xmlParserOption
  13471. *
  13472. * parse an XML in-memory document and build a tree.
  13473. *
  13474. * Returns the resulting document tree
  13475. */
  13476. xmlDocPtr
  13477. xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
  13478. {
  13479. xmlParserCtxtPtr ctxt;
  13480. if (cur == NULL)
  13481. return (NULL);
  13482. ctxt = xmlCreateDocParserCtxt(cur);
  13483. if (ctxt == NULL)
  13484. return (NULL);
  13485. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  13486. }
  13487. /**
  13488. * xmlReadFile:
  13489. * @filename: a file or URL
  13490. * @encoding: the document encoding, or NULL
  13491. * @options: a combination of xmlParserOption
  13492. *
  13493. * parse an XML file from the filesystem or the network.
  13494. *
  13495. * Returns the resulting document tree
  13496. */
  13497. xmlDocPtr
  13498. xmlReadFile(const char *filename, const char *encoding, int options)
  13499. {
  13500. xmlParserCtxtPtr ctxt;
  13501. ctxt = xmlCreateURLParserCtxt(filename, options);
  13502. if (ctxt == NULL)
  13503. return (NULL);
  13504. return (xmlDoRead(ctxt, NULL, encoding, options, 0));
  13505. }
  13506. /**
  13507. * xmlReadMemory:
  13508. * @buffer: a pointer to a char array
  13509. * @size: the size of the array
  13510. * @URL: the base URL to use for the document
  13511. * @encoding: the document encoding, or NULL
  13512. * @options: a combination of xmlParserOption
  13513. *
  13514. * parse an XML in-memory document and build a tree.
  13515. *
  13516. * Returns the resulting document tree
  13517. */
  13518. xmlDocPtr
  13519. xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
  13520. {
  13521. xmlParserCtxtPtr ctxt;
  13522. ctxt = xmlCreateMemoryParserCtxt(buffer, size);
  13523. if (ctxt == NULL)
  13524. return (NULL);
  13525. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  13526. }
  13527. /**
  13528. * xmlReadFd:
  13529. * @fd: an open file descriptor
  13530. * @URL: the base URL to use for the document
  13531. * @encoding: the document encoding, or NULL
  13532. * @options: a combination of xmlParserOption
  13533. *
  13534. * parse an XML from a file descriptor and build a tree.
  13535. * NOTE that the file descriptor will not be closed when the
  13536. * reader is closed or reset.
  13537. *
  13538. * Returns the resulting document tree
  13539. */
  13540. xmlDocPtr
  13541. xmlReadFd(int fd, const char *URL, const char *encoding, int options)
  13542. {
  13543. xmlParserCtxtPtr ctxt;
  13544. xmlParserInputBufferPtr input;
  13545. xmlParserInputPtr stream;
  13546. if (fd < 0)
  13547. return (NULL);
  13548. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  13549. if (input == NULL)
  13550. return (NULL);
  13551. input->closecallback = NULL;
  13552. ctxt = xmlNewParserCtxt();
  13553. if (ctxt == NULL) {
  13554. xmlFreeParserInputBuffer(input);
  13555. return (NULL);
  13556. }
  13557. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  13558. if (stream == NULL) {
  13559. xmlFreeParserInputBuffer(input);
  13560. xmlFreeParserCtxt(ctxt);
  13561. return (NULL);
  13562. }
  13563. inputPush(ctxt, stream);
  13564. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  13565. }
  13566. /**
  13567. * xmlReadIO:
  13568. * @ioread: an I/O read function
  13569. * @ioclose: an I/O close function
  13570. * @ioctx: an I/O handler
  13571. * @URL: the base URL to use for the document
  13572. * @encoding: the document encoding, or NULL
  13573. * @options: a combination of xmlParserOption
  13574. *
  13575. * parse an XML document from I/O functions and source and build a tree.
  13576. *
  13577. * Returns the resulting document tree
  13578. */
  13579. xmlDocPtr
  13580. xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
  13581. void *ioctx, const char *URL, const char *encoding, int options)
  13582. {
  13583. xmlParserCtxtPtr ctxt;
  13584. xmlParserInputBufferPtr input;
  13585. xmlParserInputPtr stream;
  13586. if (ioread == NULL)
  13587. return (NULL);
  13588. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  13589. XML_CHAR_ENCODING_NONE);
  13590. if (input == NULL)
  13591. return (NULL);
  13592. ctxt = xmlNewParserCtxt();
  13593. if (ctxt == NULL) {
  13594. xmlFreeParserInputBuffer(input);
  13595. return (NULL);
  13596. }
  13597. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  13598. if (stream == NULL) {
  13599. xmlFreeParserInputBuffer(input);
  13600. xmlFreeParserCtxt(ctxt);
  13601. return (NULL);
  13602. }
  13603. inputPush(ctxt, stream);
  13604. return (xmlDoRead(ctxt, URL, encoding, options, 0));
  13605. }
  13606. /**
  13607. * xmlCtxtReadDoc:
  13608. * @ctxt: an XML parser context
  13609. * @cur: a pointer to a zero terminated string
  13610. * @URL: the base URL to use for the document
  13611. * @encoding: the document encoding, or NULL
  13612. * @options: a combination of xmlParserOption
  13613. *
  13614. * parse an XML in-memory document and build a tree.
  13615. * This reuses the existing @ctxt parser context
  13616. *
  13617. * Returns the resulting document tree
  13618. */
  13619. xmlDocPtr
  13620. xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
  13621. const char *URL, const char *encoding, int options)
  13622. {
  13623. xmlParserInputPtr stream;
  13624. if (cur == NULL)
  13625. return (NULL);
  13626. if (ctxt == NULL)
  13627. return (NULL);
  13628. xmlCtxtReset(ctxt);
  13629. stream = xmlNewStringInputStream(ctxt, cur);
  13630. if (stream == NULL) {
  13631. return (NULL);
  13632. }
  13633. inputPush(ctxt, stream);
  13634. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  13635. }
  13636. /**
  13637. * xmlCtxtReadFile:
  13638. * @ctxt: an XML parser context
  13639. * @filename: a file or URL
  13640. * @encoding: the document encoding, or NULL
  13641. * @options: a combination of xmlParserOption
  13642. *
  13643. * parse an XML file from the filesystem or the network.
  13644. * This reuses the existing @ctxt parser context
  13645. *
  13646. * Returns the resulting document tree
  13647. */
  13648. xmlDocPtr
  13649. xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
  13650. const char *encoding, int options)
  13651. {
  13652. xmlParserInputPtr stream;
  13653. if (filename == NULL)
  13654. return (NULL);
  13655. if (ctxt == NULL)
  13656. return (NULL);
  13657. xmlCtxtReset(ctxt);
  13658. stream = xmlLoadExternalEntity(filename, NULL, ctxt);
  13659. if (stream == NULL) {
  13660. return (NULL);
  13661. }
  13662. inputPush(ctxt, stream);
  13663. return (xmlDoRead(ctxt, NULL, encoding, options, 1));
  13664. }
  13665. /**
  13666. * xmlCtxtReadMemory:
  13667. * @ctxt: an XML parser context
  13668. * @buffer: a pointer to a char array
  13669. * @size: the size of the array
  13670. * @URL: the base URL to use for the document
  13671. * @encoding: the document encoding, or NULL
  13672. * @options: a combination of xmlParserOption
  13673. *
  13674. * parse an XML in-memory document and build a tree.
  13675. * This reuses the existing @ctxt parser context
  13676. *
  13677. * Returns the resulting document tree
  13678. */
  13679. xmlDocPtr
  13680. xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
  13681. const char *URL, const char *encoding, int options)
  13682. {
  13683. xmlParserInputBufferPtr input;
  13684. xmlParserInputPtr stream;
  13685. if (ctxt == NULL)
  13686. return (NULL);
  13687. if (buffer == NULL)
  13688. return (NULL);
  13689. xmlCtxtReset(ctxt);
  13690. input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
  13691. if (input == NULL) {
  13692. return(NULL);
  13693. }
  13694. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  13695. if (stream == NULL) {
  13696. xmlFreeParserInputBuffer(input);
  13697. return(NULL);
  13698. }
  13699. inputPush(ctxt, stream);
  13700. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  13701. }
  13702. /**
  13703. * xmlCtxtReadFd:
  13704. * @ctxt: an XML parser context
  13705. * @fd: an open file descriptor
  13706. * @URL: the base URL to use for the document
  13707. * @encoding: the document encoding, or NULL
  13708. * @options: a combination of xmlParserOption
  13709. *
  13710. * parse an XML from a file descriptor and build a tree.
  13711. * This reuses the existing @ctxt parser context
  13712. * NOTE that the file descriptor will not be closed when the
  13713. * reader is closed or reset.
  13714. *
  13715. * Returns the resulting document tree
  13716. */
  13717. xmlDocPtr
  13718. xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
  13719. const char *URL, const char *encoding, int options)
  13720. {
  13721. xmlParserInputBufferPtr input;
  13722. xmlParserInputPtr stream;
  13723. if (fd < 0)
  13724. return (NULL);
  13725. if (ctxt == NULL)
  13726. return (NULL);
  13727. xmlCtxtReset(ctxt);
  13728. input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
  13729. if (input == NULL)
  13730. return (NULL);
  13731. input->closecallback = NULL;
  13732. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  13733. if (stream == NULL) {
  13734. xmlFreeParserInputBuffer(input);
  13735. return (NULL);
  13736. }
  13737. inputPush(ctxt, stream);
  13738. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  13739. }
  13740. /**
  13741. * xmlCtxtReadIO:
  13742. * @ctxt: an XML parser context
  13743. * @ioread: an I/O read function
  13744. * @ioclose: an I/O close function
  13745. * @ioctx: an I/O handler
  13746. * @URL: the base URL to use for the document
  13747. * @encoding: the document encoding, or NULL
  13748. * @options: a combination of xmlParserOption
  13749. *
  13750. * parse an XML document from I/O functions and source and build a tree.
  13751. * This reuses the existing @ctxt parser context
  13752. *
  13753. * Returns the resulting document tree
  13754. */
  13755. xmlDocPtr
  13756. xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
  13757. xmlInputCloseCallback ioclose, void *ioctx,
  13758. const char *URL,
  13759. const char *encoding, int options)
  13760. {
  13761. xmlParserInputBufferPtr input;
  13762. xmlParserInputPtr stream;
  13763. if (ioread == NULL)
  13764. return (NULL);
  13765. if (ctxt == NULL)
  13766. return (NULL);
  13767. xmlCtxtReset(ctxt);
  13768. input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
  13769. XML_CHAR_ENCODING_NONE);
  13770. if (input == NULL)
  13771. return (NULL);
  13772. stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
  13773. if (stream == NULL) {
  13774. xmlFreeParserInputBuffer(input);
  13775. return (NULL);
  13776. }
  13777. inputPush(ctxt, stream);
  13778. return (xmlDoRead(ctxt, URL, encoding, options, 1));
  13779. }
  13780. #define bottom_parser
  13781. #include "elfgcchack.h"