xmlscan.c.html 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. <?xml version='1.0' encoding='iso-8859-1'?>
  2. <!doctype html public '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
  3. <html xmlns='http://www.w3c.org/1999/xhtml' lang='en-us'>
  4. <head>
  5. <title>
  6. xmlscan.c
  7. </title>
  8. <meta http-equiv='content-type' content='text/html;iso-8859-1'/>
  9. <meta name='generator' content='motley-tools 1.9.4 13:40:33 Feb 18 2015'/>
  10. <meta name='author' content='cmaier@cmassoc.net'/>
  11. <meta name='robots' content='noindex,nofollow'/>
  12. <link href='toolkit.css' rel='stylesheet' type='text/css'/>
  13. </head>
  14. <body>
  15. <div class='headerlink'>
  16. [<a href='xmlread.c.html' title=' xmlread.c '>PREV</a>]
  17. [<a href='toolkit.html' title=' Index '>HOME</a>]
  18. [<a href='xmlschema.c.html' title=' xmlschema.c '>NEXT</a>]
  19. </div>
  20. <pre>
  21. /*====================================================================*
  22. *
  23. * xmlscan.c - markup scanner;
  24. *
  25. * node.h
  26. *
  27. * scan XML source and create a parse tree;
  28. *
  29. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  30. * Copyright (c) 2001-2006 by Charles Maier Associates;
  31. * Licensed under the Internet Software Consortium License;
  32. *
  33. *--------------------------------------------------------------------*/
  34. #ifndef XMLSCAN_SOURCE
  35. #define XMLSCAN_SOURCE
  36. /*====================================================================*
  37. * system header files;
  38. *--------------------------------------------------------------------*/
  39. #include &lt;string.h&gt;
  40. #include &lt;ctype.h&gt;
  41. /*====================================================================*
  42. * custom header files;
  43. *--------------------------------------------------------------------*/
  44. #include &quot;../nodes/node.h&quot;
  45. #include &quot;../tools/number.h&quot;
  46. #include &quot;../tools/error.h&quot;
  47. /*====================================================================*
  48. *
  49. * char * advance (char * string, unsigned * line);
  50. *
  51. * discard whitespace and count newlines up to the next meaningful
  52. * character;
  53. *
  54. * this function is critical to the XML parsing engine because it
  55. * ensures that node strings are NUL terminated and line counts
  56. * are accurate;
  57. *
  58. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  59. * Copyright (c) 2001-2006 by Charles Maier Associates;
  60. * Licensed under the Internet Software Consortium License;
  61. *
  62. *--------------------------------------------------------------------*/
  63. static char * advance (char * string, unsigned * lineno)
  64. {
  65. while (isspace (*string))
  66. {
  67. if (*string == '\n')
  68. {
  69. (*lineno)++;
  70. }
  71. *string++ = (char)(0);
  72. }
  73. return (string);
  74. }
  75. /*====================================================================*
  76. *
  77. * char * discard (char * string, unsigned * line);
  78. *
  79. * discard current character; advance to next character;
  80. *
  81. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  82. * Copyright (c) 2001-2006 by Charles Maier Associates;
  83. * Licensed under the Internet Software Consortium License;
  84. *
  85. *--------------------------------------------------------------------*/
  86. static char * discard (char * string, unsigned * lineno)
  87. {
  88. *string++ = (char)(0);
  89. string = advance (string, lineno);
  90. return (string);
  91. }
  92. /*====================================================================*
  93. *
  94. * char * nmtoken (char * string);
  95. *
  96. * collect nmtoken as per w3c xml 1.0 specification;
  97. *
  98. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  99. * Copyright (c) 2001-2006 by Charles Maier Associates;
  100. * Licensed under the Internet Software Consortium License;
  101. *
  102. *--------------------------------------------------------------------*/
  103. static char * nmtoken (char * string)
  104. {
  105. while (isalnum (*string) || (*string == '-') || (*string == '_') || (*string == '.') || (*string == ':'))
  106. {
  107. string++;
  108. }
  109. return (string);
  110. }
  111. /*====================================================================*
  112. *
  113. * char * content (char * string, char quote, unsigned * line);
  114. *
  115. * collect literal string; discard quotes; preserve whitespace;
  116. * count newlines;
  117. *
  118. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  119. * Copyright (c) 2001-2006 by Charles Maier Associates;
  120. * Licensed under the Internet Software Consortium License;
  121. *
  122. *--------------------------------------------------------------------*/
  123. static char * content (char * string, char quote, unsigned * lineno)
  124. {
  125. if (*string == quote)
  126. {
  127. *string++ = (char)(0);
  128. }
  129. while (*string)
  130. {
  131. if (*string == quote)
  132. {
  133. break;
  134. }
  135. if (*string++ == '\n')
  136. {
  137. (*lineno)++;
  138. }
  139. }
  140. if (*string == quote)
  141. {
  142. *string++ = (char)(0);
  143. }
  144. return (string);
  145. }
  146. /*====================================================================*
  147. *
  148. * char * collect (char * string);
  149. *
  150. * collect entity; an entity consists of non-blank characters
  151. * excluding common tag punctuation;
  152. *
  153. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  154. * Copyright (c) 2001-2006 by Charles Maier Associates;
  155. * Licensed under the Internet Software Consortium License;
  156. *
  157. *--------------------------------------------------------------------*/
  158. static char * collect (char * string)
  159. {
  160. while (*string)
  161. {
  162. if (*string == '&lt;')
  163. {
  164. break;
  165. }
  166. if (*string == '=')
  167. {
  168. break;
  169. }
  170. if (*string == '/')
  171. {
  172. break;
  173. }
  174. if (*string == '?')
  175. {
  176. break;
  177. }
  178. if (*string == '&gt;')
  179. {
  180. break;
  181. }
  182. if (isspace (*string))
  183. {
  184. break;
  185. }
  186. string++;
  187. }
  188. return (string);
  189. }
  190. /*====================================================================*
  191. *
  192. * static char * comment (char * string, unsigned * line);
  193. *
  194. * collect comment;
  195. * preserve delimiters;
  196. * preserve whitespace;
  197. * count newlines;
  198. *
  199. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  200. * Copyright (c) 2001-2006 by Charles Maier Associates;
  201. * Licensed under the Internet Software Consortium License;
  202. *
  203. *--------------------------------------------------------------------*/
  204. static char * comment (char * string, unsigned * lineno)
  205. {
  206. string++;
  207. if (*string == '-')
  208. {
  209. while (*string == '-')
  210. {
  211. string++;
  212. }
  213. while ((*string) &amp;&amp; (*string != '-'))
  214. {
  215. while ((*string) &amp;&amp; (*string != '-'))
  216. {
  217. if (*string == '\n')
  218. {
  219. (*lineno)++;
  220. }
  221. string++;
  222. }
  223. string++;
  224. }
  225. while (*string == '-')
  226. {
  227. string++;
  228. }
  229. }
  230. return (string);
  231. }
  232. /*====================================================================*
  233. *
  234. * char * literal (char * string, char quote, unsigned * line);
  235. *
  236. * collect literal;
  237. * preserve delimiters;
  238. * preserve whitespace;
  239. * count newlines;
  240. *
  241. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  242. * Copyright (c) 2001-2006 by Charles Maier Associates;
  243. * Licensed under the Internet Software Consortium License;
  244. *
  245. *--------------------------------------------------------------------*/
  246. static char * literal (char *string, char quote, unsigned * lineno)
  247. {
  248. if (*string == quote)
  249. {
  250. *string++ = (char)(0);
  251. }
  252. while (*string)
  253. {
  254. if (*string == quote)
  255. {
  256. break;
  257. }
  258. if (*string == '\n')
  259. {
  260. (*lineno)++;
  261. }
  262. string++;
  263. }
  264. if (*string == quote)
  265. {
  266. *string++ = (char)(0);
  267. }
  268. return (string);
  269. }
  270. /*====================================================================*
  271. *
  272. * char * context (char * string, signed c, unsigned *line);
  273. *
  274. * collect context;
  275. * preserve delimiters;
  276. * preserve whitespace;
  277. * count newlines;
  278. *
  279. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  280. * Copyright (c) 2001-2006 by Charles Maier Associates;
  281. * Licensed under the Internet Software Consortium License;
  282. *
  283. *--------------------------------------------------------------------*/
  284. static char * context (char *string, signed c, unsigned * lineno)
  285. {
  286. string++;
  287. while (*string)
  288. {
  289. if (*string == (char)(c))
  290. {
  291. string++;
  292. break;
  293. }
  294. if (*string == '{')
  295. {
  296. string = context (string, '}', lineno);
  297. continue;
  298. }
  299. if (*string == '(')
  300. {
  301. string = context (string, ')', lineno);
  302. continue;
  303. }
  304. if (*string == '[')
  305. {
  306. string = context (string, ']', lineno);
  307. continue;
  308. }
  309. if ((*string == '\&quot;') || (*string == '\''))
  310. {
  311. string = literal (string, *string, lineno);
  312. continue;
  313. }
  314. if (*string == '\n')
  315. {
  316. (*lineno)++;
  317. }
  318. string++;
  319. }
  320. return (string);
  321. }
  322. /*====================================================================*
  323. *
  324. * void xmlscan (NODE * node);
  325. *
  326. * node.h
  327. *
  328. * Motley Tools by Charles Maier &lt;cmaier@cmassoc.net&gt;;
  329. * Copyright (c) 2001-2006 by Charles Maier Associates;
  330. * Licensed under the Internet Software Consortium License;
  331. *
  332. *--------------------------------------------------------------------*/
  333. signed xmlscan (NODE * node)
  334. {
  335. NODE * section = node;
  336. NODE * element;
  337. NODE * attribute;
  338. NODE * value;
  339. char prefix = (char)(0);
  340. char suffix = (char)(0);
  341. char * string = node-&gt;text;
  342. unsigned lineno = 1;
  343. if (!section)
  344. {
  345. error (1, EFAULT, &quot;section is null&quot;);
  346. }
  347. if (!string)
  348. {
  349. error (1, EFAULT, &quot;string is null&quot;);
  350. }
  351. while (*string)
  352. {
  353. if (*string == '&lt;')
  354. {
  355. prefix = '&lt;';
  356. suffix = '&gt;';
  357. string = discard (string, &amp;lineno);
  358. if ((*string == '/') || (*string == '?') || (*string == '!'))
  359. {
  360. prefix = *string;
  361. string = discard (string, &amp;lineno);
  362. }
  363. element = xmlnode (section);
  364. element-&gt;line = lineno;
  365. element-&gt;type = NODE_ELEM;
  366. element-&gt;text = string;
  367. if (isalpha (*string))
  368. {
  369. string = nmtoken (string);
  370. }
  371. else if (*string == '-')
  372. {
  373. string = comment (string, &amp;lineno);
  374. }
  375. else if (*string == '[')
  376. {
  377. string = context (string, ']', &amp;lineno);
  378. }
  379. else
  380. {
  381. string = collect (string);
  382. }
  383. string = advance (string, &amp;lineno);
  384. while ((*string) &amp;&amp; (*string != '&lt;') &amp;&amp; (*string != '/') &amp;&amp; (*string != '?') &amp;&amp; (*string != '&gt;'))
  385. {
  386. attribute = xmlnode (element);
  387. attribute-&gt;line = lineno;
  388. attribute-&gt;type = NODE_ATTR;
  389. attribute-&gt;text = string;
  390. if (isalpha (*string))
  391. {
  392. string = nmtoken (string);
  393. }
  394. else if (*string == '-')
  395. {
  396. string = comment (string, &amp;lineno);
  397. }
  398. else if (*string == '[')
  399. {
  400. string = context (string, ']', &amp;lineno);
  401. }
  402. else if ((*string == '\&quot;') || (*string == '\''))
  403. {
  404. string = content (string, *string, &amp;lineno);
  405. attribute-&gt;text++;
  406. }
  407. else
  408. {
  409. string = collect (string);
  410. }
  411. string = advance (string, &amp;lineno);
  412. if (*string == '=')
  413. {
  414. string = discard (string, &amp;lineno);
  415. value = xmlnode (attribute);
  416. value-&gt;line = lineno;
  417. value-&gt;type = NODE_VALU;
  418. value-&gt;text = string;
  419. if ((*string == '\&quot;') || (*string == '\''))
  420. {
  421. string = content (string, *string, &amp;lineno);
  422. value-&gt;text++;
  423. }
  424. else
  425. {
  426. string = collect (string);
  427. }
  428. string = advance (string, &amp;lineno);
  429. }
  430. }
  431. if ((*string == '/') || (*string == '?'))
  432. {
  433. suffix = *string;
  434. string = discard (string, &amp;lineno);
  435. }
  436. }
  437. else if (*string == '&gt;')
  438. {
  439. string = discard (string, &amp;lineno);
  440. if (prefix == '!')
  441. {
  442. element-&gt;type = NODE_SGML;
  443. }
  444. else if (prefix == '?')
  445. {
  446. element-&gt;type = NODE_INST;
  447. }
  448. else if (suffix == '?')
  449. {
  450. }
  451. else if (prefix == '/')
  452. {
  453. element-&gt;type = NODE_ETAG;
  454. if (element-&gt;below)
  455. {
  456. error (1, 0, &quot;Element &lt;/%s&gt; on line %d has attributes or content.&quot;, element-&gt;text, element-&gt;line);
  457. }
  458. if (strcmp (section-&gt;text, element-&gt;text))
  459. {
  460. error (1, 0, &quot;Element &lt;%s&gt; on line %d teminated by &lt;/%s&gt; on line %d&quot;, section-&gt;text, section-&gt;line, element-&gt;text, element-&gt;line);
  461. }
  462. if (section-&gt;above)
  463. {
  464. section = section-&gt;above;
  465. }
  466. }
  467. else if (suffix == '/')
  468. {
  469. }
  470. else
  471. {
  472. section = element;
  473. }
  474. }
  475. else
  476. {
  477. signed space = 0;
  478. char * output = string;
  479. NODE * segment = xmlnode (section);
  480. segment-&gt;line = lineno;
  481. segment-&gt;type = NODE_DATA;
  482. segment-&gt;text = string;
  483. while (*string)
  484. {
  485. if (*string == '&lt;')
  486. {
  487. break;
  488. }
  489. if (isspace (*string))
  490. {
  491. string = advance (string, &amp;lineno);
  492. space++;
  493. continue;
  494. }
  495. if (space)
  496. {
  497. *output++ = ' ';
  498. space--;
  499. }
  500. *output++ = *string++;
  501. }
  502. if (output &lt; string)
  503. {
  504. *output = (char)(0);
  505. }
  506. }
  507. }
  508. return (0);
  509. }
  510. #endif
  511. </pre>
  512. <div class='footerlink'>
  513. [<a href='xmlread.c.html' title=' xmlread.c '>PREV</a>]
  514. [<a href='toolkit.html' title=' Index '>HOME</a>]
  515. [<a href='xmlschema.c.html' title=' xmlschema.c '>NEXT</a>]
  516. </div>
  517. </body>
  518. </html>