]> git.ozlabs.org Git - ccan/blob - ccan/ttxml/ttxml.c
03b101d6e8f951362c7cdacaf9a5a58e01bae15d
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */
2
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdio.h>
6
7 #include "ttxml.h"
8
9 #ifndef BUFFER
10 #define BUFFER 3264
11 #endif
12
13
14 #define XML_LETTER      1
15 #define XML_NUMBER      2
16 #define XML_SPACE       4
17 #define XML_SLASH       8
18 #define XML_OPEN        16
19 #define XML_EQUALS      32
20 #define XML_CLOSE       64
21 #define XML_QUOTE       128
22 #define XML_OTHER       256
23
24 #define XML_ALL 0xFFFFFFFF
25
26
27 typedef struct XMLBUF
28 {
29         FILE * fptr;
30         char * buf;
31         int len;
32         int read_index;
33         int eof;
34 } XMLBUF;
35
36
37 /* Allocate a new XmlNode */
38 static XmlNode* xml_new(char * name)
39 {
40         XmlNode * ret = malloc(sizeof(XmlNode));
41         if(!ret)return NULL;
42
43         ret->attrib = NULL;
44         ret->nattrib = 0;
45         ret->child = ret->next = NULL;
46
47         ret->name = name;
48         return ret;
49 }
50
51 /* free a previously allocated XmlNode */
52 void xml_free(XmlNode *target)
53 {
54         int i;
55         for(i=0; i<target->nattrib*2; i++)
56                 if(target->attrib[i])
57                         free(target->attrib[i]);
58
59         if(target->attrib)free(target->attrib);
60         if(target->child)xml_free(target->child);
61         if(target->next)xml_free(target->next);
62         free(target->name);
63         free(target);
64 }
65
66 /* Raise flags if we have a character of special meaning.
67  * This is where I've hidden the switch statements :-p
68  */
69 static int is_special(char item)
70 {
71         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
72                 return XML_LETTER;
73         if( item >= '0' && item <='9' )
74                 return XML_NUMBER;
75         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )
76                 return XML_SPACE;
77         if( item == '/' )
78                 return XML_SLASH;
79         if( item == '<' )
80                 return XML_OPEN;
81         if( item == '=' )
82                 return XML_EQUALS;
83         if( item == '>' )
84                 return XML_CLOSE;
85         if( item == '"' || item == '\'' )
86                 return XML_QUOTE;
87         return 128;
88 }
89
90 /* Refresh the buffer, if possible */
91 static void xml_read_file(XMLBUF *xml)
92 {
93         int size;
94         
95         if(xml->eof)return;
96           
97         size = fread( xml->buf, 1, xml->len, xml->fptr);
98         if( size != xml->len )
99         {
100                 xml->len = size;
101                 xml->buf[size]=0;
102                 xml->eof = 1;
103         }
104 }
105
106
107 /* All reading of the XML buffer done through these two functions */
108 /*** read a byte without advancing the offset */
109 static char xml_peek(XMLBUF *xml)
110 {
111         return xml->buf[xml->read_index];
112 }
113
114 /*** read a byte and advance the offset */
115 static char xml_read_byte(XMLBUF *xml)
116 {
117         char ret = xml_peek(xml);
118         xml->read_index++;
119         if(xml->read_index >= xml->len)
120         {
121                 if(xml->eof)
122                 {
123                   xml->read_index = xml->len;
124                   return ret;
125                 }
126                 xml->read_index = 0 ;
127                 xml_read_file(xml);
128         }
129         return ret;
130 }
131
132
133 /* skip over bytes matching the is_special mask */
134 static void xml_skip( XMLBUF *xml, int mask)
135 {
136         while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )
137                 xml_read_byte(xml);
138 }
139
140
141 /* character matching tests for the feed functions */
142 static char quotechar = 0;
143 static int test_quote(const char x)
144 {
145         static int escaped=0;
146         if( escaped || '\\' == x )
147         {
148                 escaped = !escaped;
149                 return 1;
150         }
151         if( x != quotechar )
152                 return 1;
153         return 0;
154 }
155
156 static int feed_mask = 0;
157 static int test_mask(const char x)
158 {
159         return !(is_special(x) & feed_mask);
160 }
161
162 /*
163  * char* xml_feed(x, test)
164  *
165  * Reads as many contiguous chars that pass test() into a newly allocated
166  * string.
167  *
168  * Instead of calling xml_read_byte and flogging realloc() for each byte,
169  * it checks the buffer itself.
170 */
171 static char* xml_feed( XMLBUF *xml, int (*test)(char) )
172 {
173         int offset = xml->read_index;
174         int delta;
175         char *ret = NULL;
176         int size = 0;
177
178         /* perform first and N middle realloc()'s */
179         while( test(xml->buf[offset]) )
180         {
181                 offset ++;
182
183                 if(offset >= xml->len)
184                 {
185                         delta = offset - xml->read_index;
186                         ret = realloc(ret, size + delta + 1);
187                         memcpy(ret+size, xml->buf + xml->read_index, delta);
188                         size += delta;
189                         ret[size]=0;
190                         if(xml->eof)return ret;
191                         xml_read_file(xml);
192                         xml->read_index = 0;
193                         offset = 0;
194                 }
195         }
196         /* perform final realloc() if needed */
197         if(offset > xml->read_index)
198         {
199                 delta = offset - xml->read_index;
200                 ret = realloc(ret, size + delta + 1);
201                 memcpy(ret+size, xml->buf + xml->read_index, delta);
202                 xml->read_index = offset;
203                 size += delta;
204                 ret[size]=0;
205         }
206         return ret;
207 }
208
209 /* this reads attributes from tags, of the form...
210  *
211  * <tag attr1="some arguments" attr2=argument>
212  *
213  * It is aware of quotes, and will allow anything inside quoted arguments
214  */
215 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
216 {
217         int n=0;
218
219         // how does this tag finish?
220         while(xml->len)
221         {
222                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
223                         return;
224
225                 n = ++node->nattrib;
226                 node->attrib = realloc(node->attrib, n * 2 * sizeof(char*) );
227                 node->attrib[--n*2+1] = 0;
228                 
229                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
230                 node->attrib[n*2] = xml_feed(xml, test_mask );
231                 if( xml_peek(xml) == '=' )
232                 {
233                         xml_read_byte(xml);
234                         if( is_special(xml_peek(xml)) & XML_QUOTE )
235                         {
236                                 quotechar = xml_read_byte(xml);
237                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);
238                                 xml_read_byte(xml);
239                         }
240                         else
241                         {
242                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
243                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);
244                         }
245                 }
246                 xml_skip(xml, XML_SPACE);
247         }
248 }
249
250 /* The big decision maker, is it a regular node, or a text node.
251  * If it's a node, it will check if it should have children, and if so
252  * will recurse over them.
253  * Text nodes don't have children, so no recursing.
254  */
255 static XmlNode* xml_parse(struct XMLBUF *xml)
256 {
257         int offset;
258         int toff;
259         char *tmp;
260         XmlNode **this, *ret = NULL;
261         
262         this = &ret;
263
264         xml_skip(xml, XML_SPACE);       // skip whitespace
265         offset=0;
266         while( (xml->read_index < xml->len) || !xml->eof )
267         {
268                 switch(is_special(xml_peek(xml)))
269                 {
270                         case XML_OPEN:
271                                 xml_read_byte(xml);
272                                 if(xml_peek(xml) == '/')
273                                         return ret;             // parents close tag
274                                 // read the tag name
275                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
276                                 *this = xml_new( xml_feed(xml, test_mask));
277                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
278
279                                 xml_read_attr(xml, *this);      // read attributes
280
281                                 // how does this tag finish?
282                                 switch(is_special(xml_peek(xml)))
283                                 {
284                                         case XML_CLOSE:         // child-nodes ahead
285                                                 xml_read_byte(xml);
286                                                 (*this)->child = xml_parse(xml);
287                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);
288                                                 xml_read_byte(xml);
289                                                 break;
290                                         case XML_SLASH:         // self closing tag
291                                                 xml_read_byte(xml);
292                                                 xml_read_byte(xml);
293                                                 break;
294                                 }
295                                 break;
296
297                         default:        // text node
298                                 *this = xml_new(0);
299                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
300                                 feed_mask = XML_OPEN;
301                                 (*this)->nattrib=1;
302                                 (*this)->attrib = malloc(sizeof(char*)*2);
303                                 (*this)->attrib[1] = NULL;
304                                 tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
305
306                                 /* trim the whitespace off the end of text nodes,
307                                  * by overwriting the spaces will null termination. */
308                                 toff = strlen(tmp)-1;
309                                 while( ( is_special(tmp[toff]) & XML_SPACE ) )
310                                 {
311                                         tmp[toff] = 0;
312                                         toff --;
313                                 }
314
315                                 break;
316                 }
317                 this = &(*this)->next; 
318                 xml_skip(xml, XML_SPACE);       // skip whitespace
319         }       
320
321         return ret;
322 }
323
324
325 /* bootstrap the structures for xml_parse() to be able to get started */
326 XmlNode* xml_load(const char * filename)
327 {
328         struct XMLBUF xml;
329         XmlNode *ret = NULL;
330
331 //      printf("xml_load(\"%s\");\n", filename);
332
333         xml.eof = 0;
334         xml.read_index = 0;
335         xml.fptr = fopen(filename, "rb");
336         if(!xml.fptr)
337                 return NULL;
338
339         xml.buf = malloc(BUFFER+1);
340         xml.buf[BUFFER]=0;
341         xml.len = BUFFER;
342         if(!xml.buf)
343                 goto xml_load_fail_malloc_buf;
344         
345         xml_read_file(&xml);
346
347         ret = xml_parse(&xml);
348
349         free(xml.buf);
350 xml_load_fail_malloc_buf:
351         fclose(xml.fptr);
352         return ret;
353 }
354
355 /* very basic function that will get you the first node with a given name */
356 XmlNode * xml_find(XmlNode *xml, const char *name)
357 {
358         XmlNode * ret;
359         if(xml->name)if(!strcmp(xml->name, name))return xml;
360         if(xml->child)
361         {
362                 ret = xml_find(xml->child, name);
363                 if(ret)return ret;
364         }
365         if(xml->next)
366         {
367                 ret = xml_find(xml->next, name);
368                 if(ret)return ret;
369         }
370         return NULL;
371 }
372
373 /* very basic attribute lookup function */
374 char* xml_attr(XmlNode *x, const char *name)
375 {
376         int i;
377         for(i=0; i<x->nattrib; i++)
378                 if(x->attrib[i*2])
379                         if(!strcmp(x->attrib[i*2], name))
380                                 return x->attrib[i*2+1];
381         return 0;
382 }
383
384