123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- <?xml version='1.0' encoding='iso-8859-1'?>
- <!doctype html public '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
- <html xmlns='http://www.w3c.org/1999/xhtml' lang='en-us'>
- <head>
- <title>
- xmlscan.c
- </title>
- <meta http-equiv='content-type' content='text/html;iso-8859-1'/>
- <meta name='generator' content='motley-tools 1.9.4 13:40:33 Feb 18 2015'/>
- <meta name='author' content='cmaier@cmassoc.net'/>
- <meta name='robots' content='noindex,nofollow'/>
- <link href='toolkit.css' rel='stylesheet' type='text/css'/>
- </head>
- <body>
- <div class='headerlink'>
- [<a href='xmlread.c.html' title=' xmlread.c '>PREV</a>]
- [<a href='toolkit.html' title=' Index '>HOME</a>]
- [<a href='xmlschema.c.html' title=' xmlschema.c '>NEXT</a>]
- </div>
- <pre>
- /*====================================================================*
- *
- * xmlscan.c - markup scanner;
- *
- * node.h
- *
- * scan XML source and create a parse tree;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- #ifndef XMLSCAN_SOURCE
- #define XMLSCAN_SOURCE
- /*====================================================================*
- * system header files;
- *--------------------------------------------------------------------*/
- #include <string.h>
- #include <ctype.h>
- /*====================================================================*
- * custom header files;
- *--------------------------------------------------------------------*/
- #include "../nodes/node.h"
- #include "../tools/number.h"
- #include "../tools/error.h"
- /*====================================================================*
- *
- * char * advance (char * string, unsigned * line);
- *
- * discard whitespace and count newlines up to the next meaningful
- * character;
- *
- * this function is critical to the XML parsing engine because it
- * ensures that node strings are NUL terminated and line counts
- * are accurate;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * advance (char * string, unsigned * lineno)
- {
- while (isspace (*string))
- {
- if (*string == '\n')
- {
- (*lineno)++;
- }
- *string++ = (char)(0);
- }
- return (string);
- }
- /*====================================================================*
- *
- * char * discard (char * string, unsigned * line);
- *
- * discard current character; advance to next character;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * discard (char * string, unsigned * lineno)
- {
- *string++ = (char)(0);
- string = advance (string, lineno);
- return (string);
- }
- /*====================================================================*
- *
- * char * nmtoken (char * string);
- *
- * collect nmtoken as per w3c xml 1.0 specification;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * nmtoken (char * string)
- {
- while (isalnum (*string) || (*string == '-') || (*string == '_') || (*string == '.') || (*string == ':'))
- {
- string++;
- }
- return (string);
- }
- /*====================================================================*
- *
- * char * content (char * string, char quote, unsigned * line);
- *
- * collect literal string; discard quotes; preserve whitespace;
- * count newlines;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * content (char * string, char quote, unsigned * lineno)
- {
- if (*string == quote)
- {
- *string++ = (char)(0);
- }
- while (*string)
- {
- if (*string == quote)
- {
- break;
- }
- if (*string++ == '\n')
- {
- (*lineno)++;
- }
- }
- if (*string == quote)
- {
- *string++ = (char)(0);
- }
- return (string);
- }
- /*====================================================================*
- *
- * char * collect (char * string);
- *
- * collect entity; an entity consists of non-blank characters
- * excluding common tag punctuation;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * collect (char * string)
- {
- while (*string)
- {
- if (*string == '<')
- {
- break;
- }
- if (*string == '=')
- {
- break;
- }
- if (*string == '/')
- {
- break;
- }
- if (*string == '?')
- {
- break;
- }
- if (*string == '>')
- {
- break;
- }
- if (isspace (*string))
- {
- break;
- }
- string++;
- }
- return (string);
- }
- /*====================================================================*
- *
- * static char * comment (char * string, unsigned * line);
- *
- * collect comment;
- * preserve delimiters;
- * preserve whitespace;
- * count newlines;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * comment (char * string, unsigned * lineno)
- {
- string++;
- if (*string == '-')
- {
- while (*string == '-')
- {
- string++;
- }
- while ((*string) && (*string != '-'))
- {
- while ((*string) && (*string != '-'))
- {
- if (*string == '\n')
- {
- (*lineno)++;
- }
- string++;
- }
- string++;
- }
- while (*string == '-')
- {
- string++;
- }
- }
- return (string);
- }
- /*====================================================================*
- *
- * char * literal (char * string, char quote, unsigned * line);
- *
- * collect literal;
- * preserve delimiters;
- * preserve whitespace;
- * count newlines;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * literal (char *string, char quote, unsigned * lineno)
- {
- if (*string == quote)
- {
- *string++ = (char)(0);
- }
- while (*string)
- {
- if (*string == quote)
- {
- break;
- }
- if (*string == '\n')
- {
- (*lineno)++;
- }
- string++;
- }
- if (*string == quote)
- {
- *string++ = (char)(0);
- }
- return (string);
- }
- /*====================================================================*
- *
- * char * context (char * string, signed c, unsigned *line);
- *
- * collect context;
- * preserve delimiters;
- * preserve whitespace;
- * count newlines;
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- static char * context (char *string, signed c, unsigned * lineno)
- {
- string++;
- while (*string)
- {
- if (*string == (char)(c))
- {
- string++;
- break;
- }
- if (*string == '{')
- {
- string = context (string, '}', lineno);
- continue;
- }
- if (*string == '(')
- {
- string = context (string, ')', lineno);
- continue;
- }
- if (*string == '[')
- {
- string = context (string, ']', lineno);
- continue;
- }
- if ((*string == '\"') || (*string == '\''))
- {
- string = literal (string, *string, lineno);
- continue;
- }
- if (*string == '\n')
- {
- (*lineno)++;
- }
- string++;
- }
- return (string);
- }
- /*====================================================================*
- *
- * void xmlscan (NODE * node);
- *
- * node.h
- *
- * Motley Tools by Charles Maier <cmaier@cmassoc.net>;
- * Copyright (c) 2001-2006 by Charles Maier Associates;
- * Licensed under the Internet Software Consortium License;
- *
- *--------------------------------------------------------------------*/
- signed xmlscan (NODE * node)
- {
- NODE * section = node;
- NODE * element;
- NODE * attribute;
- NODE * value;
- char prefix = (char)(0);
- char suffix = (char)(0);
- char * string = node->text;
- unsigned lineno = 1;
- if (!section)
- {
- error (1, EFAULT, "section is null");
- }
- if (!string)
- {
- error (1, EFAULT, "string is null");
- }
- while (*string)
- {
- if (*string == '<')
- {
- prefix = '<';
- suffix = '>';
- string = discard (string, &lineno);
- if ((*string == '/') || (*string == '?') || (*string == '!'))
- {
- prefix = *string;
- string = discard (string, &lineno);
- }
- element = xmlnode (section);
- element->line = lineno;
- element->type = NODE_ELEM;
- element->text = string;
- if (isalpha (*string))
- {
- string = nmtoken (string);
- }
- else if (*string == '-')
- {
- string = comment (string, &lineno);
- }
- else if (*string == '[')
- {
- string = context (string, ']', &lineno);
- }
- else
- {
- string = collect (string);
- }
- string = advance (string, &lineno);
- while ((*string) && (*string != '<') && (*string != '/') && (*string != '?') && (*string != '>'))
- {
- attribute = xmlnode (element);
- attribute->line = lineno;
- attribute->type = NODE_ATTR;
- attribute->text = string;
- if (isalpha (*string))
- {
- string = nmtoken (string);
- }
- else if (*string == '-')
- {
- string = comment (string, &lineno);
- }
- else if (*string == '[')
- {
- string = context (string, ']', &lineno);
- }
- else if ((*string == '\"') || (*string == '\''))
- {
- string = content (string, *string, &lineno);
- attribute->text++;
- }
- else
- {
- string = collect (string);
- }
- string = advance (string, &lineno);
- if (*string == '=')
- {
- string = discard (string, &lineno);
- value = xmlnode (attribute);
- value->line = lineno;
- value->type = NODE_VALU;
- value->text = string;
- if ((*string == '\"') || (*string == '\''))
- {
- string = content (string, *string, &lineno);
- value->text++;
- }
- else
- {
- string = collect (string);
- }
- string = advance (string, &lineno);
- }
- }
- if ((*string == '/') || (*string == '?'))
- {
- suffix = *string;
- string = discard (string, &lineno);
- }
- }
- else if (*string == '>')
- {
- string = discard (string, &lineno);
- if (prefix == '!')
- {
- element->type = NODE_SGML;
- }
- else if (prefix == '?')
- {
- element->type = NODE_INST;
- }
- else if (suffix == '?')
- {
- }
- else if (prefix == '/')
- {
- element->type = NODE_ETAG;
- if (element->below)
- {
- error (1, 0, "Element </%s> on line %d has attributes or content.", element->text, element->line);
- }
- if (strcmp (section->text, element->text))
- {
- error (1, 0, "Element <%s> on line %d teminated by </%s> on line %d", section->text, section->line, element->text, element->line);
- }
- if (section->above)
- {
- section = section->above;
- }
- }
- else if (suffix == '/')
- {
- }
- else
- {
- section = element;
- }
- }
- else
- {
- signed space = 0;
- char * output = string;
- NODE * segment = xmlnode (section);
- segment->line = lineno;
- segment->type = NODE_DATA;
- segment->text = string;
- while (*string)
- {
- if (*string == '<')
- {
- break;
- }
- if (isspace (*string))
- {
- string = advance (string, &lineno);
- space++;
- continue;
- }
- if (space)
- {
- *output++ = ' ';
- space--;
- }
- *output++ = *string++;
- }
- if (output < string)
- {
- *output = (char)(0);
- }
- }
- }
- return (0);
- }
- #endif
- </pre>
- <div class='footerlink'>
- [<a href='xmlread.c.html' title=' xmlread.c '>PREV</a>]
- [<a href='toolkit.html' title=' Index '>HOME</a>]
- [<a href='xmlschema.c.html' title=' xmlschema.c '>NEXT</a>]
- </div>
- </body>
- </html>
|