ttxml: modified it to use a smaller buffer during testing
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */\r
2 \r
3 #include <stdlib.h>\r
4 #include <string.h>\r
5 #include <stdio.h>\r
6 \r
7 #include "ttxml.h"\r
8 \r
9 #ifndef BUFFER\r
10 #define BUFFER 3264\r
11 #endif\r
12 \r
13 \r
14 #define XML_LETTER      1\r
15 #define XML_NUMBER      2\r
16 #define XML_SPACE       4\r
17 #define XML_SLASH       8\r
18 #define XML_OPEN        16\r
19 #define XML_EQUALS      32\r
20 #define XML_CLOSE       64\r
21 #define XML_QUOTE       128\r
22 #define XML_OTHER       256\r
23 \r
24 #define XML_ALL 0xFFFFFFFF\r
25 \r
26 \r
27 typedef struct XMLBUF\r
28 {\r
29         FILE * fptr;\r
30         char * buf;\r
31         int len;\r
32         int read_index;\r
33         int eof;\r
34 } XMLBUF;\r
35 \r
36 \r
37 /* Allocate a new XmlNode */\r
38 static XmlNode* xml_new(char * name)\r
39 {\r
40         XmlNode * ret = malloc(sizeof(XmlNode));\r
41         if(!ret)return NULL;\r
42 \r
43         ret->attrib = NULL;\r
44         ret->nattrib = 0;\r
45         ret->child = ret->next = NULL;\r
46 \r
47         ret->name = name;\r
48         return ret;\r
49 }\r
50 \r
51 /* free a previously allocated XmlNode */\r
52 void xml_free(XmlNode *target)\r
53 {\r
54         int i;\r
55         for(i=0; i<target->nattrib*2; i++)\r
56                 if(target->attrib[i])\r
57                         free(target->attrib[i]);\r
58 \r
59         if(target->attrib)free(target->attrib);\r
60         if(target->child)xml_free(target->child);\r
61         if(target->next)xml_free(target->next);\r
62         free(target->name);\r
63         free(target);\r
64 }\r
65 \r
66 /* Raise flags if we have a character of special meaning.\r
67  * This is where I've hidden the switch statements :-p\r
68  */\r
69 static int is_special(char item)\r
70 {\r
71         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))\r
72                 return XML_LETTER;\r
73         if( item >= '0' && item <='9' )\r
74                 return XML_NUMBER;\r
75         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )\r
76                 return XML_SPACE;\r
77         if( item == '/' )\r
78                 return XML_SLASH;\r
79         if( item == '<' )\r
80                 return XML_OPEN;\r
81         if( item == '=' )\r
82                 return XML_EQUALS;\r
83         if( item == '>' )\r
84                 return XML_CLOSE;\r
85         if( item == '"' || item == '\'' )\r
86                 return XML_QUOTE;\r
87         return 128;\r
88 }\r
89 \r
90 /* Refresh the buffer, if possible */\r
91 static void xml_read_file(XMLBUF *xml)\r
92 {\r
93         int size;\r
94         \r
95         if(xml->eof)return;\r
96           \r
97         size = fread( xml->buf, 1, xml->len, xml->fptr);\r
98         if( size != xml->len )\r
99         {\r
100                 xml->len = size;\r
101                 xml->buf[size]=0;\r
102                 xml->eof = 1;\r
103         }\r
104 }\r
105 \r
106 \r
107 /* All reading of the XML buffer done through these two functions */\r
108 /*** read a byte without advancing the offset */\r
109 static char xml_peek(XMLBUF *xml)\r
110 {\r
111         return xml->buf[xml->read_index];\r
112 }\r
113 \r
114 /*** read a byte and advance the offset */\r
115 static char xml_read_byte(XMLBUF *xml)\r
116 {\r
117         char ret = xml_peek(xml);\r
118         xml->read_index++;\r
119         if(xml->read_index >= xml->len)\r
120         {\r
121                 if(xml->eof)\r
122                 {\r
123                   xml->read_index = xml->len;\r
124                   return ret;\r
125                 }\r
126                 xml->read_index = 0 ;\r
127                 xml_read_file(xml);\r
128         }\r
129         return ret;\r
130 }\r
131 \r
132 \r
133 /* skip over bytes matching the is_special mask */\r
134 static void xml_skip( XMLBUF *xml, int mask)\r
135 {\r
136         while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )\r
137                 xml_read_byte(xml);\r
138 }\r
139 \r
140 \r
141 /* character matching tests for the feed functions */\r
142 static char quotechar = 0;\r
143 static int test_quote(const char x)\r
144 {\r
145         static int escaped=0;\r
146         if( escaped || '\\' == x )\r
147         {\r
148                 escaped = !escaped;\r
149                 return 1;\r
150         }\r
151         if( x != quotechar )\r
152                 return 1;\r
153         return 0;\r
154 }\r
155 \r
156 static int feed_mask = 0;\r
157 static int test_mask(const char x)\r
158 {\r
159         return !(is_special(x) & feed_mask);\r
160 }\r
161 \r
162 /*\r
163  * char* xml_feed(x, test)\r
164  *\r
165  * Reads as many contiguous chars that pass test() into a newly allocated\r
166  * string.\r
167  *\r
168  * Instead of calling xml_read_byte and flogging realloc() for each byte,\r
169  * it checks the buffer itself.\r
170 */\r
171 static char* xml_feed( XMLBUF *xml, int (*test)(char) )\r
172 {\r
173         int offset = xml->read_index;\r
174         int delta;\r
175         char *ret = NULL;\r
176         int size = 0;\r
177 \r
178         /* perform first and N middle realloc()'s */\r
179         while( test(xml->buf[offset]) )\r
180         {\r
181                 offset ++;\r
182 \r
183                 if(offset >= xml->len)\r
184                 {\r
185                         delta = offset - xml->read_index;\r
186                         ret = realloc(ret, size + delta + 1);\r
187                         memcpy(ret+size, xml->buf + xml->read_index, delta);\r
188                         size += delta;\r
189                         ret[size]=0;\r
190                         if(xml->eof)return ret;\r
191                         xml_read_file(xml);\r
192                         xml->read_index = 0;\r
193                         offset = 0;\r
194                 }\r
195         }\r
196         /* perform final realloc() if needed */\r
197         if(offset > xml->read_index)\r
198         {\r
199                 delta = offset - xml->read_index;\r
200                 ret = realloc(ret, size + delta + 1);\r
201                 memcpy(ret+size, xml->buf + xml->read_index, delta);\r
202                 xml->read_index = offset;\r
203                 size += delta;\r
204                 ret[size]=0;\r
205         }\r
206         return ret;\r
207 }\r
208 \r
209 /* this reads attributes from tags, of the form...\r
210  *\r
211  * <tag attr1="some arguments" attr2=argument>\r
212  *\r
213  * It is aware of quotes, and will allow anything inside quoted arguments\r
214  */\r
215 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)\r
216 {\r
217         int n=0;\r
218 \r
219         // how does this tag finish?\r
220         while(xml->len)\r
221         {\r
222                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )\r
223                         return;\r
224 \r
225                 n = ++node->nattrib;\r
226                 node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );\r
227                 node->attrib[--n*2+1] = 0;\r
228                 \r
229                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;\r
230                 node->attrib[n*2] = xml_feed(xml, test_mask );\r
231                 if( xml_peek(xml) == '=' )\r
232                 {\r
233                         xml_read_byte(xml);\r
234                         if( is_special(xml_peek(xml)) & XML_QUOTE )\r
235                         {\r
236                                 quotechar = xml_read_byte(xml);\r
237                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);\r
238                                 xml_read_byte(xml);\r
239                         }\r
240                         else\r
241                         {\r
242                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;\r
243                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);\r
244                         }\r
245                 }\r
246                 xml_skip(xml, XML_SPACE);\r
247         }\r
248 }\r
249 \r
250 /* The big decision maker, is it a regular node, or a text node.\r
251  * If it's a node, it will check if it should have children, and if so\r
252  * will recurse over them.\r
253  * Text nodes don't have children, so no recursing.\r
254  */\r
255 static XmlNode* xml_parse(struct XMLBUF *xml)\r
256 {\r
257         int offset;\r
258         int toff;\r
259         char *tmp;\r
260         XmlNode **this, *ret = NULL;\r
261         \r
262         this = &ret;\r
263 \r
264         xml_skip(xml, XML_SPACE);       // skip whitespace\r
265         offset=0;\r
266         while( (xml->read_index < xml->len) || !xml->eof )\r
267         {\r
268                 switch(is_special(xml_peek(xml)))\r
269                 {\r
270                         case XML_OPEN:\r
271                                 xml_read_byte(xml);\r
272                                 if(xml_peek(xml) == '/')\r
273                                         return ret;             // parents close tag\r
274                                 // read the tag name\r
275                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;\r
276                                 *this = xml_new( xml_feed(xml, test_mask));\r
277                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
278 \r
279                                 xml_read_attr(xml, *this);      // read attributes\r
280 \r
281                                 // how does this tag finish?\r
282                                 switch(is_special(xml_peek(xml)))\r
283                                 {\r
284                                         case XML_CLOSE:         // child-nodes ahead\r
285                                                 xml_read_byte(xml);\r
286                                                 (*this)->child = xml_parse(xml);\r
287                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);\r
288                                                 xml_read_byte(xml);\r
289                                                 break;\r
290                                         case XML_SLASH:         // self closing tag\r
291                                                 xml_read_byte(xml);\r
292                                                 xml_read_byte(xml);\r
293                                                 break;\r
294                                 }\r
295                                 break;\r
296 \r
297                         default:        // text node\r
298                                 *this = xml_new(0);\r
299                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
300                                 feed_mask = XML_OPEN;\r
301                                 (*this)->nattrib=1;\r
302                                 (*this)->attrib = malloc(sizeof(char*)*2);\r
303                                 (*this)->attrib[1] = NULL;\r
304                                 tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);\r
305 \r
306                                 /* trim the whitespace off the end of text nodes,\r
307                                  * by overwriting the spaces will null termination. */\r
308                                 toff = strlen(tmp)-1;\r
309                                 while( ( is_special(tmp[toff]) & XML_SPACE ) )\r
310                                 {\r
311                                         tmp[toff] = 0;\r
312                                         toff --;\r
313                                 }\r
314 \r
315                                 break;\r
316                 }\r
317                 this = &(*this)->next; \r
318                 xml_skip(xml, XML_SPACE);       // skip whitespace\r
319         }       \r
320 \r
321         return ret;\r
322 }\r
323 \r
324 \r
325 /* bootstrap the structures for xml_parse() to be able to get started */\r
326 XmlNode* xml_load(const char * filename)\r
327 {\r
328         struct XMLBUF xml;\r
329         XmlNode *ret = NULL;\r
330 \r
331 //      printf("xml_load(\"%s\");\n", filename);\r
332 \r
333         xml.eof = 0;\r
334         xml.read_index = 0;\r
335         xml.fptr = fopen(filename, "rb");\r
336         if(!xml.fptr)\r
337                 return NULL;\r
338 \r
339         xml.buf = malloc(BUFFER+1);\r
340         xml.buf[BUFFER]=0;\r
341         xml.len = BUFFER;\r
342         if(!xml.buf)\r
343                 goto xml_load_fail_malloc_buf;\r
344         \r
345         xml_read_file(&xml);\r
346 \r
347         ret = xml_parse(&xml);\r
348 \r
349         free(xml.buf);\r
350 xml_load_fail_malloc_buf:\r
351         fclose(xml.fptr);\r
352         return ret;\r
353 }\r
354 \r
355 /* very basic function that will get you the first node with a given name */\r
356 XmlNode * xml_find(XmlNode *xml, const char *name)\r
357 {\r
358         XmlNode * ret;\r
359         if(xml->name)if(!strcmp(xml->name, name))return xml;\r
360         if(xml->child)\r
361         {\r
362                 ret = xml_find(xml->child, name);\r
363                 if(ret)return ret;\r
364         }\r
365         if(xml->next)\r
366         {\r
367                 ret = xml_find(xml->next, name);\r
368                 if(ret)return ret;\r
369         }\r
370         return NULL;\r
371 }\r
372 \r
373 /* very basic attribute lookup function */\r
374 char* xml_attr(XmlNode *x, const char *name)\r
375 {\r
376         int i;\r
377         for(i=0; i<x->nattrib; i++)\r
378                 if(x->attrib[i*2])\r
379                         if(!strcmp(x->attrib[i*2], name))\r
380                                 return x->attrib[i*2+1];\r
381         return 0;\r
382 }\r
383 \r
384 \r