ffda34b832621d480203ee64fa60c0291700b117
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */
2
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdio.h>
6
7 #include "ttxml.h"
8
9 #ifndef BUFFER
10 #define BUFFER 3264
11 #endif
12
13
14 #define XML_LETTER      1
15 #define XML_NUMBER      2
16 #define XML_SPACE       4
17 #define XML_SLASH       8
18 #define XML_OPEN        16
19 #define XML_EQUALS      32
20 #define XML_CLOSE       64
21 #define XML_QUOTE       128
22 #define XML_OTHER       256
23
24 #define XML_ALL 0xFFFFFFFF
25
26
27 typedef struct XMLBUF
28 {
29         FILE * fptr;
30         char * buf;
31         int len;
32         int read_index;
33         int eof;
34         int error;
35 } XMLBUF;
36
37
38 /* Allocate a new XmlNode */
39 static XmlNode* xml_new(char * name)
40 {
41         XmlNode * ret = malloc(sizeof(XmlNode));
42         if(!ret)return NULL;
43
44         ret->attrib = NULL;
45         ret->nattrib = 0;
46         ret->child = ret->next = NULL;
47
48         ret->name = name;
49         return ret;
50 }
51
52 /* free a previously allocated XmlNode */
53 void xml_free(XmlNode *target)
54 {
55         int i;
56         for(i=0; i<target->nattrib*2; i++)
57                 if(target->attrib[i])
58                         free(target->attrib[i]);
59
60         if(target->attrib)free(target->attrib);
61         if(target->child)xml_free(target->child);
62         if(target->next)xml_free(target->next);
63         free(target->name);
64         free(target);
65 }
66
67 /* Raise flags if we have a character of special meaning.
68  * This is where I've hidden the switch statements :-p
69  */
70 static int is_special(char item)
71 {
72         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
73                 return XML_LETTER;
74         if( item >= '0' && item <='9' )
75                 return XML_NUMBER;
76         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )
77                 return XML_SPACE;
78         if( item == '/' )
79                 return XML_SLASH;
80         if( item == '<' )
81                 return XML_OPEN;
82         if( item == '=' )
83                 return XML_EQUALS;
84         if( item == '>' )
85                 return XML_CLOSE;
86         if( item == '"' || item == '\'' )
87                 return XML_QUOTE;
88         return 128;
89 }
90
91 /* Refresh the buffer, if possible */
92 static void xml_read_file(XMLBUF *xml)
93 {
94         int size;
95         
96         if(xml->eof)return;
97           
98         size = fread( xml->buf, 1, xml->len, xml->fptr);
99         if( size != xml->len )
100         {
101                 xml->len = size;
102                 xml->buf[size]=0;
103                 xml->eof = 1;
104         }
105 }
106
107
108 static void xml_end_file(XMLBUF *xml)
109 {
110         xml->len = 0;
111         xml->eof = 1;
112         xml->read_index = 0 ;
113         xml->error = 1;
114 }
115
116 /* All reading of the XML buffer done through these two functions */
117 /*** read a byte without advancing the offset */
118 static char xml_peek(XMLBUF *xml)
119 {
120         return xml->buf[xml->read_index];
121 }
122
123 /*** read a byte and advance the offset */
124 static char xml_read_byte(XMLBUF *xml)
125 {
126         char ret = xml_peek(xml);
127         xml->read_index++;
128         if(xml->read_index >= xml->len)
129         {
130                 if(xml->eof)
131                 {
132                   xml->read_index = xml->len;
133                   return ret;
134                 }
135                 xml->read_index = 0 ;
136                 xml_read_file(xml);
137         }
138         return ret;
139 }
140
141
142 /* skip over bytes matching the is_special mask */
143 static void xml_skip( XMLBUF *xml, int mask)
144 {
145         while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )
146                 xml_read_byte(xml);
147 }
148
149
150 /* character matching tests for the feed functions */
151 static char quotechar = 0;
152 static int test_quote(const char x)
153 {
154         static int escaped=0;
155         if( escaped || '\\' == x )
156         {
157                 escaped = !escaped;
158                 return 1;
159         }
160         if( x != quotechar )
161                 return 1;
162         return 0;
163 }
164
165 static int feed_mask = 0;
166 static int test_mask(const char x)
167 {
168         return !(is_special(x) & feed_mask);
169 }
170
171 /*
172  * char* xml_feed(x, test)
173  *
174  * Reads as many contiguous chars that pass test() into a newly allocated
175  * string.
176  *
177  * Instead of calling xml_read_byte and flogging realloc() for each byte,
178  * it checks the buffer itself.
179 */
180 static char* xml_feed( XMLBUF *xml, int (*test)(char) )
181 {
182         int offset = xml->read_index;
183         int delta;
184         char *ret = NULL;
185         char *tmp = NULL;
186         int size = 0;
187
188         /* perform first and N middle realloc()'s */
189         while( test(xml->buf[offset]) )
190         {
191                 offset ++;
192
193                 if(offset >= xml->len)
194                 {
195                         delta = offset - xml->read_index;
196                         tmp = realloc(ret, size + delta + 1);
197                         if(!tmp)goto xml_feed_malloc;
198                         ret = tmp;
199                         memcpy(ret+size, xml->buf + xml->read_index, delta);
200                         size += delta;
201                         ret[size]=0;
202                         if(xml->eof)return ret;
203                         xml_read_file(xml);
204                         xml->read_index = 0;
205                         offset = 0;
206                 }
207         }
208         /* perform final realloc() if needed */
209         if(offset > xml->read_index)
210         {
211                 delta = offset - xml->read_index;
212                 tmp = realloc(ret, size + delta + 1);
213                 if(!tmp)goto xml_feed_malloc;
214                 ret = tmp;
215                 memcpy(ret+size, xml->buf + xml->read_index, delta);
216                 xml->read_index = offset;
217                 size += delta;
218                 ret[size]=0;
219         }
220         return ret;
221 xml_feed_malloc:
222         free(ret);
223         xml_end_file(xml);
224         return 0;
225 }
226
227 /* this reads attributes from tags, of the form...
228  *
229  * <tag attr1="some arguments" attr2=argument>
230  *
231  * It is aware of quotes, and will allow anything inside quoted arguments
232  */
233 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
234 {
235         int n=0;
236         char **tmp;
237
238         // how does this tag finish?
239         while(xml->len)
240         {
241                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
242                         return;
243
244                 n = ++node->nattrib;
245                 tmp = realloc(node->attrib, n * 2 * sizeof(char*) );
246                 if(!tmp)goto xml_read_attr_malloc;
247                 node->attrib = tmp;
248                 node->attrib[--n*2+1] = 0;
249                 
250                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
251                 node->attrib[n*2] = xml_feed(xml, test_mask );
252                 if( xml_peek(xml) == '=' )
253                 {
254                         xml_read_byte(xml);
255                         if( is_special(xml_peek(xml)) & XML_QUOTE )
256                         {
257                                 quotechar = xml_read_byte(xml);
258                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);
259                                 xml_read_byte(xml);
260                         }
261                         else
262                         {
263                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
264                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);
265                         }
266                 }
267                 xml_skip(xml, XML_SPACE);
268         }
269         return;
270 xml_read_attr_malloc:
271         xml_end_file(xml);
272 }
273
274 /* The big decision maker, is it a regular node, or a text node.
275  * If it's a node, it will check if it should have children, and if so
276  * will recurse over them.
277  * Text nodes don't have children, so no recursing.
278  */
279 static XmlNode* xml_parse(struct XMLBUF *xml)
280 {
281         int offset;
282         int toff;
283         char **tmp;
284         char *stmp;
285         XmlNode **this, *ret = NULL;
286         
287         this = &ret;
288
289         xml_skip(xml, XML_SPACE);       // skip whitespace
290         offset=0;
291         while( (xml->read_index < xml->len) || !xml->eof )
292         {
293                 switch(is_special(xml_peek(xml)))
294                 {
295                         case XML_OPEN:
296                                 xml_read_byte(xml);
297                                 if(xml_peek(xml) == '/')
298                                         return ret;             // parents close tag
299                                 // read the tag name
300                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
301                                 *this = xml_new( xml_feed(xml, test_mask));
302                                 if(xml->error)goto xml_parse_malloc;
303                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
304
305                                 xml_read_attr(xml, *this);      // read attributes
306
307                                 // how does this tag finish?
308                                 switch(is_special(xml_peek(xml)))
309                                 {
310                                         case XML_CLOSE:         // child-nodes ahead
311                                                 xml_read_byte(xml);
312                                                 (*this)->child = xml_parse(xml);
313                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);
314                                                 xml_read_byte(xml);
315                                                 break;
316                                         case XML_SLASH:         // self closing tag
317                                                 xml_read_byte(xml);
318                                                 xml_read_byte(xml);
319                                                 break;
320                                 }
321                                 break;
322
323                         default:        // text node
324                                 *this = xml_new(0);
325                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
326                                 feed_mask = XML_OPEN;
327                                 (*this)->nattrib=1;
328                                 tmp = malloc(sizeof(char*)*2);
329                                 if(!tmp)goto xml_parse_malloc;
330                                 (*this)->attrib = tmp;
331                                 (*this)->attrib[1] = NULL;
332                                 stmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
333
334                                 /* trim the whitespace off the end of text nodes,
335                                  * by overwriting the spaces will null termination. */
336                                 toff = strlen(stmp)-1;
337                                 while( ( is_special(stmp[toff]) & XML_SPACE ) )
338                                 {
339                                         stmp[toff] = 0;
340                                         toff --;
341                                 }
342
343                                 break;
344                 }
345                 this = &(*this)->next;
346                 xml_skip(xml, XML_SPACE);       // skip whitespace
347         }
348
349         return ret;
350 xml_parse_malloc:
351         xml_end_file(xml);
352         if(ret)xml_free(ret);
353         return 0;
354 }
355
356
357 /* bootstrap the structures for xml_parse() to be able to get started */
358 XmlNode* xml_load(const char * filename)
359 {
360         struct XMLBUF xml;
361         XmlNode *ret = NULL;
362
363 //      printf("xml_load(\"%s\");\n", filename);
364
365         xml.error = 0;
366         xml.eof = 0;
367         xml.read_index = 0;
368         xml.fptr = fopen(filename, "rb");
369         if(!xml.fptr)
370                 return NULL;
371
372         xml.buf = malloc(BUFFER+1);
373         if(!xml.buf)
374                 goto xml_load_fail_malloc_buf;
375         xml.buf[BUFFER]=0;
376         xml.len = BUFFER;
377         
378         xml_read_file(&xml);
379
380         ret = xml_parse(&xml);
381
382         if(xml.error)
383         {
384                 xml_free(ret);
385                 ret = NULL;
386         }
387
388         free(xml.buf);
389 xml_load_fail_malloc_buf:
390         fclose(xml.fptr);
391         return ret;
392 }
393
394 /* very basic function that will get you the first node with a given name */
395 XmlNode * xml_find(XmlNode *xml, const char *name)
396 {
397         XmlNode * ret;
398         if(xml->name)if(!strcmp(xml->name, name))return xml;
399         if(xml->child)
400         {
401                 ret = xml_find(xml->child, name);
402                 if(ret)return ret;
403         }
404         if(xml->next)
405         {
406                 ret = xml_find(xml->next, name);
407                 if(ret)return ret;
408         }
409         return NULL;
410 }
411
412 /* very basic attribute lookup function */
413 char* xml_attr(XmlNode *x, const char *name)
414 {
415         int i;
416         for(i=0; i<x->nattrib; i++)
417                 if(x->attrib[i*2])
418                         if(!strcmp(x->attrib[i*2], name))
419                                 return x->attrib[i*2+1];
420         return 0;
421 }
422
423