xmlscan.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. /*====================================================================*
  2. *
  3. * xmlscan.c - markup scanner;
  4. *
  5. * node.h
  6. *
  7. * scan XML source and create a parse tree;
  8. *
  9. * Motley Tools by Charles Maier;
  10. * Copyright (c) 2001-2006 by Charles Maier Associates;
  11. * Licensed under the Internet Software Consortium License;
  12. *
  13. *--------------------------------------------------------------------*/
  14. #ifndef XMLSCAN_SOURCE
  15. #define XMLSCAN_SOURCE
  16. /*====================================================================*
  17. * system header files;
  18. *--------------------------------------------------------------------*/
  19. #include <string.h>
  20. #include <ctype.h>
  21. /*====================================================================*
  22. * custom header files;
  23. *--------------------------------------------------------------------*/
  24. #include "../nodes/node.h"
  25. #include "../tools/number.h"
  26. #include "../tools/error.h"
  27. /*====================================================================*
  28. *
  29. * char * advance (char * string, unsigned * line);
  30. *
  31. * discard whitespace and count newlines up to the next meaningful
  32. * character;
  33. *
  34. * this function is critical to the XML parsing engine because it
  35. * ensures that node strings are NUL terminated and line counts
  36. * are accurate;
  37. *
  38. * Motley Tools by Charles Maier;
  39. * Copyright (c) 2001-2006 by Charles Maier Associates;
  40. * Licensed under the Internet Software Consortium License;
  41. *
  42. *--------------------------------------------------------------------*/
  43. static char * advance (char * string, unsigned * lineno)
  44. {
  45. while (isspace (*string))
  46. {
  47. if (*string == '\n')
  48. {
  49. (*lineno)++;
  50. }
  51. *string++ = (char)(0);
  52. }
  53. return (string);
  54. }
  55. /*====================================================================*
  56. *
  57. * char * discard (char * string, unsigned * line);
  58. *
  59. * discard current character; advance to next character;
  60. *
  61. * Motley Tools by Charles Maier;
  62. * Copyright (c) 2001-2006 by Charles Maier Associates;
  63. * Licensed under the Internet Software Consortium License;
  64. *
  65. *--------------------------------------------------------------------*/
  66. static char * discard (char * string, unsigned * lineno)
  67. {
  68. *string++ = (char)(0);
  69. string = advance (string, lineno);
  70. return (string);
  71. }
  72. /*====================================================================*
  73. *
  74. * char * nmtoken (char * string);
  75. *
  76. * collect nmtoken as per w3c xml 1.0 specification;
  77. *
  78. * Motley Tools by Charles Maier;
  79. * Copyright (c) 2001-2006 by Charles Maier Associates;
  80. * Licensed under the Internet Software Consortium License;
  81. *
  82. *--------------------------------------------------------------------*/
  83. static char * nmtoken (char * string)
  84. {
  85. while (isalnum (*string) || (*string == '-') || (*string == '_') || (*string == '.') || (*string == ':'))
  86. {
  87. string++;
  88. }
  89. return (string);
  90. }
  91. /*====================================================================*
  92. *
  93. * char * content (char * string, char quote, unsigned * line);
  94. *
  95. * collect literal string; discard quotes; preserve whitespace;
  96. * count newlines;
  97. *
  98. * Motley Tools by Charles Maier;
  99. * Copyright (c) 2001-2006 by Charles Maier Associates;
  100. * Licensed under the Internet Software Consortium License;
  101. *
  102. *--------------------------------------------------------------------*/
  103. static char * content (char * string, char quote, unsigned * lineno)
  104. {
  105. if (*string == quote)
  106. {
  107. *string++ = (char)(0);
  108. }
  109. while (*string)
  110. {
  111. if (*string == quote)
  112. {
  113. break;
  114. }
  115. if (*string++ == '\n')
  116. {
  117. (*lineno)++;
  118. }
  119. }
  120. if (*string == quote)
  121. {
  122. *string++ = (char)(0);
  123. }
  124. return (string);
  125. }
  126. /*====================================================================*
  127. *
  128. * char * collect (char * string);
  129. *
  130. * collect entity; an entity consists of non-blank characters
  131. * excluding common tag punctuation;
  132. *
  133. * Motley Tools by Charles Maier;
  134. * Copyright (c) 2001-2006 by Charles Maier Associates;
  135. * Licensed under the Internet Software Consortium License;
  136. *
  137. *--------------------------------------------------------------------*/
  138. static char * collect (char * string)
  139. {
  140. while (*string)
  141. {
  142. if (*string == '<')
  143. {
  144. break;
  145. }
  146. if (*string == '=')
  147. {
  148. break;
  149. }
  150. if (*string == '/')
  151. {
  152. break;
  153. }
  154. if (*string == '?')
  155. {
  156. break;
  157. }
  158. if (*string == '>')
  159. {
  160. break;
  161. }
  162. if (isspace (*string))
  163. {
  164. break;
  165. }
  166. string++;
  167. }
  168. return (string);
  169. }
  170. /*====================================================================*
  171. *
  172. * static char * comment (char * string, unsigned * line);
  173. *
  174. * collect comment;
  175. * preserve delimiters;
  176. * preserve whitespace;
  177. * count newlines;
  178. *
  179. * Motley Tools by Charles Maier;
  180. * Copyright (c) 2001-2006 by Charles Maier Associates;
  181. * Licensed under the Internet Software Consortium License;
  182. *
  183. *--------------------------------------------------------------------*/
  184. static char * comment (char * string, unsigned * lineno)
  185. {
  186. string++;
  187. if (*string == '-')
  188. {
  189. while (*string == '-')
  190. {
  191. string++;
  192. }
  193. while ((*string) && (*string != '-'))
  194. {
  195. while ((*string) && (*string != '-'))
  196. {
  197. if (*string == '\n')
  198. {
  199. (*lineno)++;
  200. }
  201. string++;
  202. }
  203. string++;
  204. }
  205. while (*string == '-')
  206. {
  207. string++;
  208. }
  209. }
  210. return (string);
  211. }
  212. /*====================================================================*
  213. *
  214. * char * literal (char * string, char quote, unsigned * line);
  215. *
  216. * collect literal;
  217. * preserve delimiters;
  218. * preserve whitespace;
  219. * count newlines;
  220. *
  221. * Motley Tools by Charles Maier;
  222. * Copyright (c) 2001-2006 by Charles Maier Associates;
  223. * Licensed under the Internet Software Consortium License;
  224. *
  225. *--------------------------------------------------------------------*/
  226. static char * literal (char *string, char quote, unsigned * lineno)
  227. {
  228. if (*string == quote)
  229. {
  230. *string++ = (char)(0);
  231. }
  232. while (*string)
  233. {
  234. if (*string == quote)
  235. {
  236. break;
  237. }
  238. if (*string == '\n')
  239. {
  240. (*lineno)++;
  241. }
  242. string++;
  243. }
  244. if (*string == quote)
  245. {
  246. *string++ = (char)(0);
  247. }
  248. return (string);
  249. }
  250. /*====================================================================*
  251. *
  252. * char * context (char * string, signed c, unsigned *line);
  253. *
  254. * collect context;
  255. * preserve delimiters;
  256. * preserve whitespace;
  257. * count newlines;
  258. *
  259. * Motley Tools by Charles Maier;
  260. * Copyright (c) 2001-2006 by Charles Maier Associates;
  261. * Licensed under the Internet Software Consortium License;
  262. *
  263. *--------------------------------------------------------------------*/
  264. static char * context (char *string, signed c, unsigned * lineno)
  265. {
  266. string++;
  267. while (*string)
  268. {
  269. if (*string == (char)(c))
  270. {
  271. string++;
  272. break;
  273. }
  274. if (*string == '{')
  275. {
  276. string = context (string, '}', lineno);
  277. continue;
  278. }
  279. if (*string == '(')
  280. {
  281. string = context (string, ')', lineno);
  282. continue;
  283. }
  284. if (*string == '[')
  285. {
  286. string = context (string, ']', lineno);
  287. continue;
  288. }
  289. if ((*string == '\"') || (*string == '\''))
  290. {
  291. string = literal (string, *string, lineno);
  292. continue;
  293. }
  294. if (*string == '\n')
  295. {
  296. (*lineno)++;
  297. }
  298. string++;
  299. }
  300. return (string);
  301. }
  302. /*====================================================================*
  303. *
  304. * void xmlscan (NODE * node);
  305. *
  306. * node.h
  307. *
  308. * Motley Tools by Charles Maier;
  309. * Copyright (c) 2001-2006 by Charles Maier Associates;
  310. * Licensed under the Internet Software Consortium License;
  311. *
  312. *--------------------------------------------------------------------*/
  313. signed xmlscan (NODE * node)
  314. {
  315. NODE * section = node;
  316. NODE * element;
  317. NODE * attribute;
  318. NODE * value;
  319. char prefix = (char)(0);
  320. char suffix = (char)(0);
  321. char * string = node->text;
  322. unsigned lineno = 1;
  323. if (!section)
  324. {
  325. error (1, EFAULT, "section is null");
  326. }
  327. if (!string)
  328. {
  329. error (1, EFAULT, "string is null");
  330. }
  331. while (*string)
  332. {
  333. if (*string == '<')
  334. {
  335. prefix = '<';
  336. suffix = '>';
  337. string = discard (string, &lineno);
  338. if ((*string == '/') || (*string == '?') || (*string == '!'))
  339. {
  340. prefix = *string;
  341. string = discard (string, &lineno);
  342. }
  343. element = xmlnode (section);
  344. element->line = lineno;
  345. element->type = NODE_ELEM;
  346. element->text = string;
  347. if (isalpha (*string))
  348. {
  349. string = nmtoken (string);
  350. }
  351. else if (*string == '-')
  352. {
  353. string = comment (string, &lineno);
  354. }
  355. else if (*string == '[')
  356. {
  357. string = context (string, ']', &lineno);
  358. }
  359. else
  360. {
  361. string = collect (string);
  362. }
  363. string = advance (string, &lineno);
  364. while ((*string) && (*string != '<') && (*string != '/') && (*string != '?') && (*string != '>'))
  365. {
  366. attribute = xmlnode (element);
  367. attribute->line = lineno;
  368. attribute->type = NODE_ATTR;
  369. attribute->text = string;
  370. if (isalpha (*string))
  371. {
  372. string = nmtoken (string);
  373. }
  374. else if (*string == '-')
  375. {
  376. string = comment (string, &lineno);
  377. }
  378. else if (*string == '[')
  379. {
  380. string = context (string, ']', &lineno);
  381. }
  382. else if ((*string == '\"') || (*string == '\''))
  383. {
  384. string = content (string, *string, &lineno);
  385. attribute->text++;
  386. }
  387. else
  388. {
  389. string = collect (string);
  390. }
  391. string = advance (string, &lineno);
  392. if (*string == '=')
  393. {
  394. string = discard (string, &lineno);
  395. value = xmlnode (attribute);
  396. value->line = lineno;
  397. value->type = NODE_VALU;
  398. value->text = string;
  399. if ((*string == '\"') || (*string == '\''))
  400. {
  401. string = content (string, *string, &lineno);
  402. value->text++;
  403. }
  404. else
  405. {
  406. string = collect (string);
  407. }
  408. string = advance (string, &lineno);
  409. }
  410. }
  411. if ((*string == '/') || (*string == '?'))
  412. {
  413. suffix = *string;
  414. string = discard (string, &lineno);
  415. }
  416. }
  417. else if (*string == '>')
  418. {
  419. string = discard (string, &lineno);
  420. if (prefix == '!')
  421. {
  422. element->type = NODE_SGML;
  423. }
  424. else if (prefix == '?')
  425. {
  426. element->type = NODE_INST;
  427. }
  428. else if (suffix == '?')
  429. {
  430. }
  431. else if (prefix == '/')
  432. {
  433. element->type = NODE_ETAG;
  434. if (element->below)
  435. {
  436. error (1, 0, "Element </%s> on line %d has attributes or content.", element->text, element->line);
  437. }
  438. if (strcmp (section->text, element->text))
  439. {
  440. error (1, 0, "Element <%s> on line %d teminated by </%s> on line %d", section->text, section->line, element->text, element->line);
  441. }
  442. if (section->above)
  443. {
  444. section = section->above;
  445. }
  446. }
  447. else if (suffix == '/')
  448. {
  449. }
  450. else
  451. {
  452. section = element;
  453. }
  454. }
  455. else
  456. {
  457. signed space = 0;
  458. char * output = string;
  459. NODE * segment = xmlnode (section);
  460. segment->line = lineno;
  461. segment->type = NODE_DATA;
  462. segment->text = string;
  463. while (*string)
  464. {
  465. if (*string == '<')
  466. {
  467. break;
  468. }
  469. if (isspace (*string))
  470. {
  471. string = advance (string, &lineno);
  472. space++;
  473. continue;
  474. }
  475. if (space)
  476. {
  477. *output++ = ' ';
  478. space--;
  479. }
  480. *output++ = *string++;
  481. }
  482. if (output < string)
  483. {
  484. *output = (char)(0);
  485. }
  486. }
  487. }
  488. return (0);
  489. }
  490. #endif