]> git.ozlabs.org Git - ccan/blob - ccan/ttxml/ttxml.c
4e1cbbd4d2bac4841fcc0a631570ef9cbde95742
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */\r
2 \r
3 #include <stdlib.h>\r
4 #include <string.h>\r
5 #include <stdio.h>\r
6 \r
7 #include "ttxml.h"\r
8 \r
9 \r
10 #define BUFFER 3264\r
11 \r
12 #define XML_LETTER      1\r
13 #define XML_NUMBER      2\r
14 #define XML_SPACE       4\r
15 #define XML_SLASH       8\r
16 #define XML_OPEN        16\r
17 #define XML_EQUALS      32\r
18 #define XML_CLOSE       64\r
19 #define XML_QUOTE       128\r
20 #define XML_OTHER       256\r
21 \r
22 #define XML_ALL 0xFFFFFFFF\r
23 \r
24 \r
25 typedef struct XMLBUF\r
26 {\r
27         FILE * fptr;\r
28         char * buf;\r
29         int len;\r
30         int read_index;\r
31         int eof;\r
32 } XMLBUF;\r
33 \r
34 \r
35 /* Allocate a new XmlNode */\r
36 XmlNode* xml_new(char * name)\r
37 {\r
38         XmlNode * ret = malloc(sizeof(XmlNode));\r
39         if(!ret)return NULL;\r
40 \r
41         ret->attrib = NULL;\r
42         ret->nattrib = 0;\r
43         ret->child = ret->next = NULL;\r
44 \r
45         ret->name = name;\r
46         return ret;\r
47 }\r
48 \r
49 /* free a previously allocated XmlNode */\r
50 void xml_free(XmlNode *target)\r
51 {\r
52         int i;\r
53         for(i=0; i<target->nattrib*2; i++)\r
54                 if(target->attrib[i])\r
55                         free(target->attrib[i]);\r
56 \r
57         if(target->attrib)free(target->attrib);\r
58         if(target->child)xml_free(target->child);\r
59         if(target->next)xml_free(target->next);\r
60         free(target->name);\r
61         free(target);\r
62 }\r
63 \r
64 /* Raise flags if we have a character of special meaning.\r
65  * This is where I've hidden the switch statements :-p\r
66  */\r
67 static int is_special(char item)\r
68 {\r
69         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))\r
70                 return XML_LETTER;\r
71         if( item >= '0' && item <='9' )\r
72                 return XML_NUMBER;\r
73         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )\r
74                 return XML_SPACE;\r
75         if( item == '/' )\r
76                 return XML_SLASH;\r
77         if( item == '<' )\r
78                 return XML_OPEN;\r
79         if( item == '=' )\r
80                 return XML_EQUALS;\r
81         if( item == '>' )\r
82                 return XML_CLOSE;\r
83         if( item == '"' || item == '\'' )\r
84                 return XML_QUOTE;\r
85         return 128;\r
86 }\r
87 \r
88 /* Refresh the buffer, if possible */\r
89 static void xml_read_file(XMLBUF *xml)\r
90 {\r
91         int size;\r
92         \r
93         if(xml->eof)return;\r
94           \r
95         size = fread( xml->buf, 1, xml->len, xml->fptr);\r
96         if( size != xml->len )\r
97         {\r
98                 printf("Buffer reduction\n");\r
99                 xml->len = size;\r
100                 xml->buf[size]=0;\r
101                 xml->eof = 1;\r
102         }\r
103 }\r
104 \r
105 \r
106 /* All reading of the XML buffer done through these two functions */\r
107 /*** read a byte without advancing the offset */\r
108 static char xml_peek(XMLBUF *xml)\r
109 {\r
110         return xml->buf[xml->read_index];\r
111 }\r
112 \r
113 /*** read a byte and advance the offset */\r
114 static char xml_read_byte(XMLBUF *xml)\r
115 {\r
116         char ret = xml_peek(xml);\r
117         xml->read_index++;\r
118         if(xml->read_index >= xml->len)\r
119         {\r
120                 if(xml->eof)\r
121                 {\r
122                   xml->read_index = xml->len;\r
123                   return ret;\r
124                 }\r
125                 xml->read_index = 0 ;\r
126                 xml_read_file(xml);\r
127         }\r
128         return ret;\r
129 }\r
130 \r
131 \r
132 /* skip over bytes matching the is_special mask */\r
133 static void xml_skip( XMLBUF *xml, int mask)\r
134 {\r
135         while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )\r
136                 xml_read_byte(xml);\r
137 }\r
138 \r
139 \r
140 /* character matching tests for the feed functions */\r
141 static char quotechar = 0;\r
142 static int test_quote(const char x)\r
143 {\r
144         static int escaped=0;\r
145         if( escaped || '\\' == x )\r
146         {\r
147                 escaped = !escaped;\r
148                 return 1;\r
149         }\r
150         if( x != quotechar )\r
151                 return 1;\r
152         return 0;\r
153 }\r
154 \r
155 static int feed_mask = 0;\r
156 static int test_mask(const char x)\r
157 {\r
158         return !(is_special(x) & feed_mask);\r
159 }\r
160 \r
161 /*\r
162  * char* xml_feed(x, test)\r
163  *\r
164  * Reads as many contiguous chars that pass test() into a newly allocated\r
165  * string.\r
166  *\r
167  * Instead of calling xml_read_byte and flogging realloc() for each byte,\r
168  * it checks the buffer itself.\r
169 */\r
170 static char* xml_feed( XMLBUF *xml, int (*test)(char) )\r
171 {\r
172         int offset = xml->read_index;\r
173         int delta;\r
174         char *ret = NULL;\r
175         int size = 0;\r
176 \r
177         /* perform first and N middle realloc()'s */\r
178         while( test(xml->buf[offset]) )\r
179         {\r
180                 offset ++;\r
181 \r
182                 if(offset >= xml->len)\r
183                 {\r
184                         delta = offset - xml->read_index;\r
185                         ret = realloc(ret, size + delta + 1);\r
186                         memcpy(ret+size, xml->buf + xml->read_index, delta);\r
187                         size += delta;\r
188                         ret[size]=0;\r
189                         if(xml->eof)return ret;\r
190                         xml_read_file(xml);\r
191                         xml->read_index = 0;\r
192                         offset = 0;\r
193                 }\r
194         }\r
195         /* perform final realloc() if needed */\r
196         if(offset > xml->read_index)\r
197         {\r
198                 delta = offset - xml->read_index;\r
199                 ret = realloc(ret, size + delta + 1);\r
200                 memcpy(ret+size, xml->buf + xml->read_index, delta);\r
201                 xml->read_index = offset;\r
202                 size += delta;\r
203                 ret[size]=0;\r
204         }\r
205         return ret;\r
206 }\r
207 \r
208 /* this reads attributes from tags, of the form...\r
209  *\r
210  * <tag attr1="some arguments" attr2=argument>\r
211  *\r
212  * It is aware of quotes, and will allow anything inside quoted arguments\r
213  */\r
214 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)\r
215 {\r
216         int n=0;\r
217 \r
218         // how does this tag finish?\r
219         while(xml->len)\r
220         {\r
221                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )\r
222                         return;\r
223 \r
224                 n = ++node->nattrib;\r
225                 node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );\r
226                 node->attrib[--n*2+1] = 0;\r
227                 \r
228                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;\r
229                 node->attrib[n*2] = xml_feed(xml, test_mask );\r
230                 if( xml_peek(xml) == '=' )\r
231                 {\r
232                         xml_read_byte(xml);\r
233                         if( is_special(xml_peek(xml)) & XML_QUOTE )\r
234                         {\r
235                                 quotechar = xml_read_byte(xml);\r
236                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);\r
237                                 xml_read_byte(xml);\r
238                         }\r
239                         else\r
240                         {\r
241                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;\r
242                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);\r
243                         }\r
244                 }\r
245                 xml_skip(xml, XML_SPACE);\r
246         }\r
247 }\r
248 \r
249 /* The big decision maker, is it a regular node, or a text node.\r
250  * If it's a node, it will check if it should have children, and if so\r
251  * will recurse over them.\r
252  * Text nodes don't have children, so no recursing.\r
253  */\r
254 static XmlNode* xml_parse(struct XMLBUF *xml)\r
255 {\r
256         int offset;\r
257         int toff;\r
258         char *tmp;\r
259         XmlNode **this, *ret = NULL;\r
260         \r
261         this = &ret;\r
262 \r
263         xml_skip(xml, XML_SPACE);       // skip whitespace\r
264         offset=0;\r
265         while( (xml->read_index < xml->len) || !xml->eof )\r
266         {\r
267                 switch(is_special(xml_peek(xml)))\r
268                 {\r
269                         case XML_OPEN:\r
270                                 xml_read_byte(xml);\r
271                                 if(xml_peek(xml) == '/')\r
272                                         return ret;             // parents close tag\r
273                                 // read the tag name\r
274                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;\r
275                                 *this = xml_new( xml_feed(xml, test_mask));\r
276                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
277 \r
278                                 xml_read_attr(xml, *this);      // read attributes\r
279 \r
280                                 // how does this tag finish?\r
281                                 switch(is_special(xml_peek(xml)))\r
282                                 {\r
283                                         case XML_CLOSE:         // child-nodes ahead\r
284                                                 xml_read_byte(xml);\r
285                                                 (*this)->child = xml_parse(xml);\r
286                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);\r
287                                                 xml_read_byte(xml);\r
288                                                 break;\r
289                                         case XML_SLASH:         // self closing tag\r
290                                                 xml_read_byte(xml);\r
291                                                 xml_read_byte(xml);\r
292                                                 break;\r
293                                 }\r
294                                 break;\r
295 \r
296                         default:        // text node\r
297                                 *this = xml_new(0);\r
298                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
299                                 feed_mask = XML_OPEN;\r
300                                 (*this)->nattrib=1;\r
301                                 (*this)->attrib = malloc(sizeof(char*)*2);\r
302                                 (*this)->attrib[1] = NULL;\r
303                                 tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);\r
304 \r
305                                 /* trim the whitespace off the end of text nodes,\r
306                                  * by overwriting the spaces will null termination. */\r
307                                 toff = strlen(tmp)-1;\r
308                                 while( ( is_special(tmp[toff]) & XML_SPACE ) )\r
309                                 {\r
310                                         tmp[toff] = 0;\r
311                                         toff --;\r
312                                 }\r
313 \r
314                                 break;\r
315                 }\r
316                 this = &(*this)->next; \r
317                 xml_skip(xml, XML_SPACE);       // skip whitespace\r
318         }       \r
319 \r
320         return ret;\r
321 }\r
322 \r
323 \r
324 /* bootstrap the structures for xml_parse() to be able to get started */\r
325 XmlNode* xml_load(const char * filename)\r
326 {\r
327         struct XMLBUF xml;\r
328         XmlNode *ret = NULL;\r
329 \r
330 //      printf("xml_load(\"%s\");\n", filename);\r
331 \r
332         xml.eof = 0;\r
333         xml.read_index = 0;\r
334         xml.fptr = fopen(filename, "rb");\r
335         if(!xml.fptr)\r
336         {\r
337                 printf("Opening file failed\n");\r
338                 return NULL;\r
339         }\r
340 \r
341         xml.buf = malloc(BUFFER+1);\r
342         xml.buf[BUFFER]=0;\r
343         xml.len = BUFFER;\r
344         if(!xml.buf)\r
345                 goto xml_load_fail_malloc_buf;\r
346         \r
347         xml_read_file(&xml);\r
348 \r
349         ret = xml_parse(&xml);\r
350 \r
351         free(xml.buf);\r
352 xml_load_fail_malloc_buf:\r
353         fclose(xml.fptr);\r
354         return ret;\r
355 }\r
356 \r
357 /* very basic function that will get you the first node with a given name */\r
358 XmlNode * xml_find(XmlNode *xml, const char *name)\r
359 {\r
360         XmlNode * ret;\r
361         if(xml->name)if(!strcmp(xml->name, name))return xml;\r
362         if(xml->child)\r
363         {\r
364                 ret = xml_find(xml->child, name);\r
365                 if(ret)return ret;\r
366         }\r
367         if(xml->next)\r
368         {\r
369                 ret = xml_find(xml->next, name);\r
370                 if(ret)return ret;\r
371         }\r
372         return NULL;\r
373 }\r
374 \r
375 /* very basic attribute lookup function */\r
376 char* xml_attr(XmlNode *x, const char *name)\r
377 {\r
378         int i;\r
379         for(i=0; i<x->nattrib; i++)\r
380                 if(x->attrib[i*2])\r
381                         if(!strcmp(x->attrib[i*2], name))\r
382                                 return x->attrib[i*2+1];\r
383         return 0;\r
384 }\r
385 \r
386 \r
387 #ifdef TEST\r
388 /* print out the heirarchy of an XML file, useful for debugging */\r
389 void xp(XmlNode *x, int level, int max)\r
390 {\r
391         int i;\r
392         char text[] = "text";\r
393         char *name = text;\r
394         if(level > max)return;\r
395         if(!x)return;\r
396         if(x->name)name = x->name;\r
397         for(i=0; i<level; i++)printf("    ");\r
398         printf("%s:", name);\r
399         if(x->name)\r
400         for(i=0; i<x->nattrib; i++)\r
401                 printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]);\r
402         else printf("%s", x->attrib[0]);\r
403         printf("\n");\r
404         if(x->child)xp(x->child, level+1, max);\r
405         if(x->next)xp(x->next, level, max);\r
406 }\r
407 \r
408 \r
409 int main(int argc, char *argv[])\r
410 {\r
411         XmlNode *x, *tmp;\r
412         \r
413         if(!argv[1])\r
414         {\r
415                 printf("USAGE: %s name\n\t reads name where name is an XML file.\n",\r
416                                 argv[0]);\r
417                 return 1;\r
418         }\r
419 \r
420 #ifdef PROFILE\r
421         for(int i=0; i<1000; i++)\r
422         {\r
423 #endif\r
424                 x = xml_load(argv[1]);\r
425 \r
426                 if(!x)\r
427                 {\r
428                         printf("Failed to load.\n");\r
429                         return 2;\r
430                 }\r
431 #ifndef PROFILE\r
432                 xp(x, 1, 20);\r
433 #endif\r
434                 xml_free(x);\r
435 #ifdef PROFILE\r
436         }\r
437 #endif\r
438 \r
439         return 0;\r
440 }\r
441 #endif\r
442 \r