1 /* Licensed under GPL - see LICENSE file for details */
\r
13 #define XML_LETTER 1
\r
14 #define XML_NUMBER 2
\r
18 #define XML_EQUALS 32
\r
19 #define XML_CLOSE 64
\r
20 #define XML_QUOTE 128
\r
21 #define XML_OTHER 256
\r
23 #define XML_ALL 0xFFFFFFFF
\r
26 typedef struct XMLBUF
\r
36 /* Allocate a new XmlNode */
\r
37 XmlNode* xml_new(char * name)
\r
39 XmlNode * ret = malloc(sizeof(XmlNode));
\r
40 if(!ret)return NULL;
\r
44 ret->child = ret->next = NULL;
\r
50 /* free a previously allocated XmlNode */
\r
51 void xml_free(XmlNode *target)
\r
54 for(i=0; i<target->nattrib*2; i++)
\r
55 if(target->attrib[i])
\r
56 free(target->attrib[i]);
\r
58 if(target->attrib)free(target->attrib);
\r
59 if(target->child)xml_free(target->child);
\r
60 if(target->next)xml_free(target->next);
\r
65 /* raise flags if we have a character of special meaning
\r
67 * This is where I've hidden the switch statements :-p
\r
70 int is_special(char item)
\r
72 if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
\r
74 if( item >= '0' && item <='9' )
\r
76 if( item == 0x20 || item == '\t' || item == 0x0D || item == 0x0A )
\r
86 if( item == '"' || item == '\'' )
\r
91 /* Refresh the buffer, expects not to be called when EOF */
\r
92 static void xml_read_file(XMLBUF *xml)
\r
96 size = fread( xml->buf, 1, xml->len, xml->fptr);
\r
97 if( size != xml->len )
\r
106 /* All reading of the XML buffer done through these two functions */
\r
107 /*** read a byte without advancing the offset */
\r
108 static char xml_peek(XMLBUF *xml)
\r
110 return xml->buf[xml->read_index];
\r
113 /*** read a byte and advance the offset */
\r
114 static char xml_read_byte(XMLBUF *xml)
\r
116 char ret = xml_peek(xml);
\r
118 if(xml->read_index >= xml->len)
\r
120 if(xml->eof)return ret;
\r
121 xml->read_index = 0 ;
\r
122 xml_read_file(xml);
\r
128 /* skip over bytes matching the is_special mask */
\r
129 static void xml_skip( XMLBUF *xml, int mask)
\r
131 printf("just called\n");
\r
132 while( is_special(xml_peek(xml)) & mask && xml->len )
\r
133 xml_read_byte(xml);
\r
137 /* character matching tests for the feed functions */
\r
138 static char quotechar = 0;
\r
139 static int test_quote(const char x)
\r
141 static int escaped=0;
\r
142 if( escaped || '\\' == x )
\r
144 escaped = !escaped;
\r
147 if( x != quotechar )
\r
152 static int feed_mask = 0;
\r
153 static int test_mask(const char x)
\r
155 return !(is_special(x) & feed_mask);
\r
159 * char* xml_feed(x, test)
\r
161 * Reads as many contiguous chars that pass test() into a newly allocated
\r
164 * Instead of calling xml_read_byte and flogging realloc() for each byte,
\r
165 * it checks the buffer itself.
\r
167 static char* xml_feed( XMLBUF *xml, int (*test)(char) )
\r
169 int offset = xml->read_index;
\r
174 /* perform first and N middle realloc()'s */
\r
175 while( test(xml->buf[offset]) )
\r
179 if(offset >= xml->len)
\r
181 delta = offset - xml->read_index;
\r
182 ret = realloc(ret, size + delta + 1);
\r
183 memcpy(ret+size, xml->buf + xml->read_index, delta);
\r
186 if(xml->eof)return ret;
\r
187 xml_read_file(xml);
\r
188 xml->read_index = 0;
\r
192 /* perform final realloc() if needed */
\r
193 if(offset > xml->read_index)
\r
195 delta = offset - xml->read_index;
\r
196 ret = realloc(ret, size + delta + 1);
\r
197 memcpy(ret+size, xml->buf + xml->read_index, delta);
\r
198 xml->read_index = offset;
\r
205 /* this reads attributes from tags, of the form...
\r
207 * <tag attr1="some arguments" attr2=argument>
\r
209 * It is aware of quotes, and will allow anything inside quoted arguments
\r
211 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
\r
215 // how does this tag finish?
\r
218 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
\r
221 n = ++node->nattrib;
\r
222 node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );
\r
223 node->attrib[--n*2+1] = 0;
\r
225 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
\r
226 node->attrib[n*2] = xml_feed(xml, test_mask );
\r
227 if( xml_peek(xml) == '=' )
\r
229 xml_read_byte(xml);
\r
230 if( is_special(xml_peek(xml)) & XML_QUOTE )
\r
232 quotechar = xml_read_byte(xml);
\r
233 node->attrib[n*2+1] = xml_feed(xml, test_quote);
\r
234 xml_read_byte(xml);
\r
238 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
\r
239 node->attrib[n*2+1] = xml_feed(xml, test_mask);
\r
242 xml_skip(xml, XML_SPACE);
\r
246 /* The big decision maker, is it a regular node, or a text node.
\r
247 * If it's a node, it will check if it should have children, and if so
\r
248 * will recurse over them.
\r
249 * Text nodes don't have children, so no recursing.
\r
251 static XmlNode* xml_parse(struct XMLBUF *xml)
\r
256 XmlNode **this, *ret = NULL;
\r
260 xml_skip(xml, XML_SPACE); // skip whitespace
\r
262 while( (xml->read_index < xml->len) || !xml->eof )
\r
264 switch(is_special(xml_peek(xml)))
\r
267 xml_read_byte(xml);
\r
268 if(xml_peek(xml) == '/')
\r
269 return ret; // parents close tag
\r
270 // read the tag name
\r
271 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
\r
272 *this = xml_new( xml_feed(xml, test_mask));
\r
273 xml_skip(xml, XML_SPACE); // skip any whitespace
\r
275 xml_read_attr(xml, *this); // read attributes
\r
277 // how does this tag finish?
\r
278 switch(is_special(xml_peek(xml)))
\r
280 case XML_CLOSE: // child-nodes ahead
\r
281 xml_read_byte(xml);
\r
282 (*this)->child = xml_parse(xml);
\r
283 xml_skip(xml, XML_ALL ^ XML_CLOSE);
\r
284 xml_read_byte(xml);
\r
286 case XML_SLASH: // self closing tag
\r
287 xml_read_byte(xml);
\r
288 xml_read_byte(xml);
\r
293 default: // text node
\r
294 *this = xml_new(0);
\r
295 xml_skip(xml, XML_SPACE); // skip any whitespace
\r
296 feed_mask = XML_OPEN;
\r
297 (*this)->nattrib=1;
\r
298 (*this)->attrib = malloc(sizeof(char*)*2);
\r
299 (*this)->attrib[1] = NULL;
\r
300 tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
\r
302 /* trim the whitespace off the end of text nodes,
\r
303 * by overwriting the spaces will null termination. */
\r
304 toff = strlen(tmp)-1;
\r
305 while( ( is_special(tmp[toff]) & XML_SPACE ) )
\r
313 this = &(*this)->next;
\r
314 xml_skip(xml, XML_SPACE); // skip whitespace
\r
321 /* bootstrap the structures for xml_parse() to be able to get started */
\r
322 XmlNode* xml_load(const char * filename)
\r
325 XmlNode *ret = NULL;
\r
327 // printf("xml_load(\"%s\");\n", filename);
\r
330 xml.read_index = 0;
\r
331 xml.fptr = fopen(filename, "rb");
\r
334 printf("Opening file failed\n");
\r
338 xml.buf = malloc(BUFFER+1);
\r
341 goto xml_load_fail_malloc_buf;
\r
343 xml.len = fread(xml.buf, 1, BUFFER, xml.fptr);
\r
344 if(xml.len < BUFFER)
\r
347 ret = xml_parse(&xml);
\r
350 xml_load_fail_malloc_buf:
\r
355 /* very basic function that will get you the first node with a given name */
\r
356 XmlNode * xml_find(XmlNode *xml, const char *name)
\r
359 if(xml->name)if(!strcmp(xml->name, name))return xml;
\r
362 ret = xml_find(xml->child, name);
\r
367 ret = xml_find(xml->next, name);
\r
373 /* very basic attribute lookup function */
\r
374 char* xml_attr(XmlNode *x, const char *name)
\r
377 for(i=0; i<x->nattrib; i++)
\r
379 if(!strcmp(x->attrib[i*2], name))
\r
380 return x->attrib[i*2+1];
\r
386 /* print out the heirarchy of an XML file, useful for debugging */
\r
387 void xp(XmlNode *x, int level, int max)
\r
390 char text[] = "text";
\r
392 if(level > max)return;
\r
394 if(x->name)name = x->name;
\r
395 for(i=0; i<level; i++)printf(" ");
\r
396 printf("%s:", name);
\r
398 for(i=0; i<x->nattrib; i++)
\r
399 printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]);
\r
400 else printf("%s", x->attrib[0]);
\r
402 if(x->child)xp(x->child, level+1, max);
\r
403 if(x->next)xp(x->next, level, max);
\r
407 int main(int argc, char *argv[])
\r
414 printf("USAGE: %s name\n\t reads name where name is an XML file.\n",
\r
420 for(i=0; i<1000; i++)
\r
423 x = xml_load(argv[1]);
\r
427 printf("Failed to load.\n");
\r
439 // tmp = xml_find(x, "geometry");
\r
441 // printf("Happily free.\n");
\r