ttxml: Rplaced ratchet buffer with regular buffer, 20x speed increase
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */\r
2 \r
3 #include <stdlib.h>\r
4 #include <string.h>\r
5 #include <stdio.h>\r
6 \r
7 #include "ttxml.h"\r
8 \r
9 \r
10 #define BUFFER 3264\r
11 \r
12 \r
13 #define XML_LETTER      1\r
14 #define XML_NUMBER      2\r
15 #define XML_SPACE       4\r
16 #define XML_SLASH       8\r
17 #define XML_OPEN        16\r
18 #define XML_EQUALS      32\r
19 #define XML_CLOSE       64\r
20 #define XML_QUOTE       128\r
21 #define XML_OTHER       256\r
22 \r
23 #define XML_ALL 0xFFFFFFFF\r
24 \r
25 \r
26 typedef struct XMLBUF\r
27 {\r
28         FILE * fptr;\r
29         char * buf;\r
30         int len;\r
31         int read_index;\r
32         int eof;\r
33 } XMLBUF;\r
34 \r
35 \r
36 /* Allocate a new XmlNode */\r
37 XmlNode* xml_new(char * name)\r
38 {\r
39         XmlNode * ret = malloc(sizeof(XmlNode));\r
40         if(!ret)return NULL;\r
41 \r
42         ret->attrib = NULL;\r
43         ret->nattrib = 0;\r
44         ret->child = ret->next = NULL;\r
45 \r
46         ret->name = name;\r
47         return ret;\r
48 }\r
49 \r
50 /* free a previously allocated XmlNode */\r
51 void xml_free(XmlNode *target)\r
52 {\r
53         int i;\r
54         for(i=0; i<target->nattrib*2; i++)\r
55                 if(target->attrib[i])\r
56                         free(target->attrib[i]);\r
57 \r
58         if(target->attrib)free(target->attrib);\r
59         if(target->child)xml_free(target->child);\r
60         if(target->next)xml_free(target->next);\r
61         free(target->name);\r
62         free(target);\r
63 }\r
64 \r
65 /* raise flags if we have a character of special meaning\r
66  *\r
67  * This is where I've hidden the switch statements :-p\r
68  *\r
69  */\r
70 int is_special(char item)\r
71 {\r
72         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))\r
73                 return XML_LETTER;\r
74         if( item >= '0' && item <='9' )\r
75                 return XML_NUMBER;\r
76         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )\r
77                 return XML_SPACE;\r
78         if( item == '/' )\r
79                 return XML_SLASH;\r
80         if( item == '<' )\r
81                 return XML_OPEN;\r
82         if( item == '=' )\r
83                 return XML_EQUALS;\r
84         if( item == '>' )\r
85                 return XML_CLOSE;\r
86         if( item == '"' || item == '\'' )\r
87                 return XML_QUOTE;\r
88         return 128;\r
89 }\r
90 \r
91 /* Refresh the buffer, expects not to be called when EOF */\r
92 static void xml_read_file(XMLBUF *xml)\r
93 {\r
94         int size;\r
95         \r
96         size = fread( xml->buf, 1, xml->len, xml->fptr);\r
97         if( size != xml->len )\r
98         {\r
99                 xml->len = size;\r
100                 xml->buf[size]=0;\r
101                 xml->eof = 1;\r
102         }\r
103 }\r
104 \r
105 \r
106 /* All reading of the XML buffer done through these two functions */\r
107 /*** read a byte without advancing the offset */\r
108 static char xml_peek(XMLBUF *xml)\r
109 {\r
110         return xml->buf[xml->read_index];\r
111 }\r
112 \r
113 /*** read a byte and advance the offset */\r
114 static char xml_read_byte(XMLBUF *xml)\r
115 {\r
116         char ret = xml_peek(xml);\r
117         xml->read_index++;\r
118         if(xml->read_index >= xml->len)\r
119         {\r
120                 if(xml->eof)return ret;\r
121                 xml->read_index = 0 ;\r
122                 xml_read_file(xml);\r
123         }\r
124         return ret;\r
125 }\r
126 \r
127 \r
128 /* skip over bytes matching the is_special mask */\r
129 static void xml_skip( XMLBUF *xml, int mask)\r
130 {\r
131         printf("just called\n");\r
132         while( is_special(xml_peek(xml)) & mask && xml->len )\r
133                 xml_read_byte(xml);\r
134 }\r
135 \r
136 \r
137 /* character matching tests for the feed functions */\r
138 static char quotechar = 0;\r
139 static int test_quote(const char x)\r
140 {\r
141         static int escaped=0;\r
142         if( escaped || '\\' == x )\r
143         {\r
144                 escaped = !escaped;\r
145                 return 1;\r
146         }\r
147         if( x != quotechar )\r
148                 return 1;\r
149         return 0;\r
150 }\r
151 \r
152 static int feed_mask = 0;\r
153 static int test_mask(const char x)\r
154 {\r
155         return !(is_special(x) & feed_mask);\r
156 }\r
157 \r
158 /*\r
159  * char* xml_feed(x, test)\r
160  *\r
161  * Reads as many contiguous chars that pass test() into a newly allocated\r
162  * string.\r
163  *\r
164  * Instead of calling xml_read_byte and flogging realloc() for each byte,\r
165  * it checks the buffer itself.\r
166 */\r
167 static char* xml_feed( XMLBUF *xml, int (*test)(char) )\r
168 {\r
169         int offset = xml->read_index;\r
170         int delta;\r
171         char *ret = NULL;\r
172         int size = 0;\r
173 \r
174         /* perform first and N middle realloc()'s */\r
175         while( test(xml->buf[offset]) )\r
176         {\r
177                 offset ++;\r
178 \r
179                 if(offset >= xml->len)\r
180                 {\r
181                         delta = offset - xml->read_index;\r
182                         ret = realloc(ret, size + delta + 1);\r
183                         memcpy(ret+size, xml->buf + xml->read_index, delta);\r
184                         size += delta;\r
185                         ret[size]=0;\r
186                         if(xml->eof)return ret;\r
187                         xml_read_file(xml);\r
188                         xml->read_index = 0;\r
189                         offset = 0;\r
190                 }\r
191         }\r
192         /* perform final realloc() if needed */\r
193         if(offset > xml->read_index)\r
194         {\r
195                 delta = offset - xml->read_index;\r
196                 ret = realloc(ret, size + delta + 1);\r
197                 memcpy(ret+size, xml->buf + xml->read_index, delta);\r
198                 xml->read_index = offset;\r
199                 size += delta;\r
200                 ret[size]=0;\r
201         }\r
202         return ret;\r
203 }\r
204 \r
205 /* this reads attributes from tags, of the form...\r
206  *\r
207  * <tag attr1="some arguments" attr2=argument>\r
208  *\r
209  * It is aware of quotes, and will allow anything inside quoted arguments\r
210  */\r
211 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)\r
212 {\r
213         int n=0;\r
214 \r
215         // how does this tag finish?\r
216         while(xml->len)\r
217         {\r
218                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )\r
219                         return;\r
220 \r
221                 n = ++node->nattrib;\r
222                 node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );\r
223                 node->attrib[--n*2+1] = 0;\r
224                 \r
225                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;\r
226                 node->attrib[n*2] = xml_feed(xml, test_mask );\r
227                 if( xml_peek(xml) == '=' )\r
228                 {\r
229                         xml_read_byte(xml);\r
230                         if( is_special(xml_peek(xml)) & XML_QUOTE )\r
231                         {\r
232                                 quotechar = xml_read_byte(xml);\r
233                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);\r
234                                 xml_read_byte(xml);\r
235                         }\r
236                         else\r
237                         {\r
238                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;\r
239                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);\r
240                         }\r
241                 }\r
242                 xml_skip(xml, XML_SPACE);\r
243         }\r
244 }\r
245 \r
246 /* The big decision maker, is it a regular node, or a text node.\r
247  * If it's a node, it will check if it should have children, and if so\r
248  * will recurse over them.\r
249  * Text nodes don't have children, so no recursing.\r
250  */\r
251 static XmlNode* xml_parse(struct XMLBUF *xml)\r
252 {\r
253         int offset;\r
254         int toff;\r
255         char *tmp;\r
256         XmlNode **this, *ret = NULL;\r
257         \r
258         this = &ret;\r
259 \r
260         xml_skip(xml, XML_SPACE);       // skip whitespace\r
261         offset=0;\r
262         while( (xml->read_index < xml->len) || !xml->eof )\r
263         {\r
264                 switch(is_special(xml_peek(xml)))\r
265                 {\r
266                         case XML_OPEN:\r
267                                 xml_read_byte(xml);\r
268                                 if(xml_peek(xml) == '/')\r
269                                         return ret;             // parents close tag\r
270                                 // read the tag name\r
271                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;\r
272                                 *this = xml_new( xml_feed(xml, test_mask));\r
273                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
274 \r
275                                 xml_read_attr(xml, *this);      // read attributes\r
276 \r
277                                 // how does this tag finish?\r
278                                 switch(is_special(xml_peek(xml)))\r
279                                 {\r
280                                         case XML_CLOSE:         // child-nodes ahead\r
281                                                 xml_read_byte(xml);\r
282                                                 (*this)->child = xml_parse(xml);\r
283                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);\r
284                                                 xml_read_byte(xml);\r
285                                                 break;\r
286                                         case XML_SLASH:         // self closing tag\r
287                                                 xml_read_byte(xml);\r
288                                                 xml_read_byte(xml);\r
289                                                 break;\r
290                                 }\r
291                                 break;\r
292 \r
293                         default:        // text node\r
294                                 *this = xml_new(0);\r
295                                 xml_skip(xml, XML_SPACE);       // skip any whitespace\r
296                                 feed_mask = XML_OPEN;\r
297                                 (*this)->nattrib=1;\r
298                                 (*this)->attrib = malloc(sizeof(char*)*2);\r
299                                 (*this)->attrib[1] = NULL;\r
300                                 tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);\r
301 \r
302                                 /* trim the whitespace off the end of text nodes,\r
303                                  * by overwriting the spaces will null termination. */\r
304                                 toff = strlen(tmp)-1;\r
305                                 while( ( is_special(tmp[toff]) & XML_SPACE ) )\r
306                                 {\r
307                                         tmp[toff] = 0;\r
308                                         toff --;\r
309                                 }\r
310 \r
311                                 break;\r
312                 }\r
313                 this = &(*this)->next; \r
314                 xml_skip(xml, XML_SPACE);       // skip whitespace\r
315         }       \r
316 \r
317         return ret;\r
318 }\r
319 \r
320 \r
321 /* bootstrap the structures for xml_parse() to be able to get started */\r
322 XmlNode* xml_load(const char * filename)\r
323 {\r
324         struct XMLBUF xml;\r
325         XmlNode *ret = NULL;\r
326 \r
327 //      printf("xml_load(\"%s\");\n", filename);\r
328 \r
329         xml.eof = 0;\r
330         xml.read_index = 0;\r
331         xml.fptr = fopen(filename, "rb");\r
332         if(!xml.fptr)\r
333         {\r
334                 printf("Opening file failed\n");\r
335                 return NULL;\r
336         }\r
337 \r
338         xml.buf = malloc(BUFFER+1);\r
339         xml.buf[BUFFER]=0;\r
340         if(!xml.buf)\r
341                 goto xml_load_fail_malloc_buf;\r
342         \r
343         xml.len = fread(xml.buf, 1, BUFFER, xml.fptr);\r
344         if(xml.len < BUFFER)\r
345                 xml.eof = 1;\r
346 \r
347         ret = xml_parse(&xml);\r
348 \r
349         free(xml.buf);\r
350 xml_load_fail_malloc_buf:\r
351         fclose(xml.fptr);\r
352         return ret;\r
353 }\r
354 \r
355 /* very basic function that will get you the first node with a given name */\r
356 XmlNode * xml_find(XmlNode *xml, const char *name)\r
357 {\r
358         XmlNode * ret;\r
359         if(xml->name)if(!strcmp(xml->name, name))return xml;\r
360         if(xml->child)\r
361         {\r
362                 ret = xml_find(xml->child, name);\r
363                 if(ret)return ret;\r
364         }\r
365         if(xml->next)\r
366         {\r
367                 ret = xml_find(xml->next, name);\r
368                 if(ret)return ret;\r
369         }\r
370         return NULL;\r
371 }\r
372 \r
373 /* very basic attribute lookup function */\r
374 char* xml_attr(XmlNode *x, const char *name)\r
375 {\r
376         int i;\r
377         for(i=0; i<x->nattrib; i++)\r
378                 if(x->attrib[i*2])\r
379                         if(!strcmp(x->attrib[i*2], name))\r
380                                 return x->attrib[i*2+1];\r
381         return 0;\r
382 }\r
383 \r
384 \r
385 #ifdef TEST\r
386 /* print out the heirarchy of an XML file, useful for debugging */\r
387 void xp(XmlNode *x, int level, int max)\r
388 {\r
389         int i;\r
390         char text[] = "text";\r
391         char *name = text;\r
392         if(level > max)return;\r
393         if(!x)return;\r
394         if(x->name)name = x->name;\r
395         for(i=0; i<level; i++)printf("    ");\r
396         printf("%s:", name);\r
397         if(x->name)\r
398         for(i=0; i<x->nattrib; i++)\r
399                 printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]);\r
400         else printf("%s", x->attrib[0]);\r
401         printf("\n");\r
402         if(x->child)xp(x->child, level+1, max);\r
403         if(x->next)xp(x->next, level, max);\r
404 }\r
405 \r
406 \r
407 int main(int argc, char *argv[])\r
408 {\r
409         XmlNode *x, *tmp;\r
410         int i;\r
411 \r
412         if(!argv[1])\r
413         {\r
414                 printf("USAGE: %s name\n\t reads name where name is an XML file.\n",\r
415                                 argv[0]);\r
416                 return 1;\r
417         }\r
418 \r
419 #ifdef PROFILE\r
420         for(i=0; i<1000; i++)\r
421         {\r
422 #endif\r
423                 x = xml_load(argv[1]);\r
424 \r
425                 if(!x)\r
426                 {\r
427                         printf("Failed to load.\n");\r
428                         return 2;\r
429                 }\r
430 #ifndef PROFILE\r
431                 xp(x, 1, 20);\r
432 #endif\r
433                 xml_free(x);\r
434 #ifdef PROFILE\r
435         }\r
436 #endif\r
437 \r
438         \r
439 //      tmp = xml_find(x, "geometry");\r
440 //      xp(x, 1, 6);\r
441 //      printf("Happily free.\n");\r
442         return 0;\r
443 }\r
444 #endif\r
445 \r