/*====================================================================* * * xmlscan.c - markup scanner; * * node.h * * scan XML source and create a parse tree; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ #ifndef XMLSCAN_SOURCE #define XMLSCAN_SOURCE /*====================================================================* * system header files; *--------------------------------------------------------------------*/ #include #include /*====================================================================* * custom header files; *--------------------------------------------------------------------*/ #include "../nodes/node.h" #include "../tools/number.h" #include "../tools/error.h" /*====================================================================* * * char * advance (char * string, unsigned * line); * * discard whitespace and count newlines up to the next meaningful * character; * * this function is critical to the XML parsing engine because it * ensures that node strings are NUL terminated and line counts * are accurate; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * advance (char * string, unsigned * lineno) { while (isspace ((unsigned char)* string)) { if (* string == '\n') { (* lineno)++; } * string++ = (char) (0); } return (string); } /*====================================================================* * * char * discard (char * string, unsigned * line); * * discard current character; advance to next character; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * discard (char * string, unsigned * lineno) { * string++ = (char) (0); string = advance (string, lineno); return (string); } /*====================================================================* * * char * nmtoken (char * string); * * collect nmtoken as per w3c xml 1.0 specification; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * nmtoken (char * string) { while (isalnum ((unsigned char)* string) || (* string == '-') || (* string == '_') || (* string == '.') || (* string == ':')) { string++; } return (string); } /*====================================================================* * * char * content (char * string, char quote, unsigned * line); * * collect literal string; discard quotes; preserve whitespace; * count newlines; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * content (char * string, char quote, unsigned * lineno) { if (* string == quote) { * string++ = (char) (0); } while (* string) { if (* string == quote) { break; } if (* string++ == '\n') { (* lineno)++; } } if (* string == quote) { * string++ = (char) (0); } return (string); } /*====================================================================* * * char * collect (char * string); * * collect entity; an entity consists of non-blank characters * excluding common tag punctuation; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * collect (char * string) { while (* string) { if (* string == '<') { break; } if (* string == '=') { break; } if (* string == '/') { break; } if (* string == '?') { break; } if (* string == '>') { break; } if (isspace ((unsigned char)* string)) { break; } string++; } return (string); } /*====================================================================* * * static char * comment (char * string, unsigned * line); * * collect comment; * preserve delimiters; * preserve whitespace; * count newlines; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * comment (char * string, unsigned * lineno) { string++; if (* string == '-') { while (* string == '-') { string++; } while ((* string) && (* string != '-')) { while ((* string) && (* string != '-')) { if (* string == '\n') { (* lineno)++; } string++; } string++; } while (* string == '-') { string++; } } return (string); } /*====================================================================* * * char * literal (char * string, char quote, unsigned * line); * * collect literal; * preserve delimiters; * preserve whitespace; * count newlines; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * literal (char * string, char quote, unsigned * lineno) { if (* string == quote) { * string++ = (char) (0); } while (* string) { if (* string == quote) { break; } if (* string == '\n') { (* lineno)++; } string++; } if (* string == quote) { * string++ = (char) (0); } return (string); } /*====================================================================* * * char * context (char * string, signed c, unsigned *line); * * collect context; * preserve delimiters; * preserve whitespace; * count newlines; * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ static char * context (char * string, signed c, unsigned * lineno) { string++; while (* string) { if (* string == (char) (c)) { string++; break; } if (* string == '{') { string = context (string, '}', lineno); continue; } if (* string == '(') { string = context (string, ')', lineno); continue; } if (* string == '[') { string = context (string, ']', lineno); continue; } if ((* string == '\"') || (* string == '\'')) { string = literal (string, * string, lineno); continue; } if (* string == '\n') { (* lineno)++; } string++; } return (string); } /*====================================================================* * * void xmlscan (NODE * node); * * node.h * * Motley Tools by Charles Maier ; * Copyright (c) 2001-2006 by Charles Maier Associates; * Licensed under the Internet Software Consortium License; * *--------------------------------------------------------------------*/ signed xmlscan (NODE * node) { NODE * section = node; NODE * element; NODE * attribute; NODE * value; char prefix = (char) (0); char suffix = (char) (0); char * string = node->text; unsigned lineno = 1; if (! section) { error (1, EFAULT, "section is null"); } if (! string) { error (1, EFAULT, "string is null"); } while (* string) { if (* string == '<') { prefix = '<'; suffix = '>'; string = discard (string, & lineno); if ((* string == '/') || (* string == '?') || (* string == '!')) { prefix = * string; string = discard (string, & lineno); } element = xmlnode (section); element->line = lineno; element->type = NODE_ELEM; element->text = string; if (isalpha ((unsigned char)* string)) { string = nmtoken (string); } else if (* string == '-') { string = comment (string, & lineno); } else if (* string == '[') { string = context (string, ']', & lineno); } else { string = collect (string); } string = advance (string, & lineno); while ((* string) && (* string != '<') && (* string != '/') && (* string != '?') && (* string != '>')) { attribute = xmlnode (element); attribute->line = lineno; attribute->type = NODE_ATTR; attribute->text = string; if (isalpha (* string)) { string = nmtoken (string); } else if (* string == '-') { string = comment (string, & lineno); } else if (* string == '[') { string = context (string, ']', & lineno); } else if ((* string == '\"') || (* string == '\'')) { string = content (string, * string, & lineno); attribute->text++; } else { string = collect (string); } string = advance (string, & lineno); if (* string == '=') { string = discard (string, & lineno); value = xmlnode (attribute); value->line = lineno; value->type = NODE_VALU; value->text = string; if ((* string == '\"') || (* string == '\'')) { string = content (string, * string, & lineno); value->text++; } else { string = collect (string); } string = advance (string, & lineno); } } if ((* string == '/') || (* string == '?')) { suffix = * string; string = discard (string, & lineno); } } else if (* string == '>') { string = discard (string, & lineno); if (prefix == '!') { element->type = NODE_SGML; } else if (prefix == '?') { element->type = NODE_INST; } else if (suffix == '?') { } else if (prefix == '/') { element->type = NODE_ETAG; if (element->below) { error (1, 0, "Element on line %d has attributes or content.", element->text, element->line); } if (strcmp (section->text, element->text)) { error (1, 0, "Element <%s> on line %d teminated by on line %d", section->text, section->line, element->text, element->line); } if (section->above) { section = section->above; } } else if (suffix == '/') { } else { section = element; } } else { signed space = 0; char * output = string; NODE * segment = xmlnode (section); segment->line = lineno; segment->type = NODE_DATA; segment->text = string; while (* string) { if (* string == '<') { break; } if (isspace ((unsigned char)* string)) { string = advance (string, & lineno); space++; continue; } if (space) { * output++ = ' '; space--; } * output++ = * string++; } if (output < string) { * output = (char) (0); } } } return (0); } #endif