json_out: make json_out_finished finish buffer.
[ccan] / ccan / ttxml / ttxml.c
1 /* Licensed under GPL - see LICENSE file for details */
2
3 #include <stdlib.h>
4 #include <string.h>
5 #include <stdio.h>
6
7 #include "ttxml.h"
8
9 #ifndef BUFFER
10 #define BUFFER 3264
11 #endif
12
13
14 #define XML_LETTER      1
15 #define XML_NUMBER      2
16 #define XML_SPACE       4
17 #define XML_SLASH       8
18 #define XML_OPEN        16
19 #define XML_EQUALS      32
20 #define XML_CLOSE       64
21 #define XML_QUOTE       128
22 #define XML_OTHER       256
23
24 #define XML_ALL 0xFFFFFFFF
25
26
27 typedef struct XMLBUF
28 {
29         FILE * fptr;
30         char * buf;
31         int len;
32         int read_index;
33         int eof;
34         int error;
35 } XMLBUF;
36
37
38 /* Allocate a new XmlNode */
39 static XmlNode* xml_new(char * name)
40 {
41         XmlNode * ret = malloc(sizeof(XmlNode));
42         if(!ret)return NULL;
43
44         ret->attrib = NULL;
45         ret->nattrib = 0;
46         ret->child = ret->next = NULL;
47
48         ret->name = name;
49         return ret;
50 }
51
52 /* free a previously allocated XmlNode */
53 void xml_free(XmlNode *target)
54 {
55         int i;
56         for(i=0; i<target->nattrib*2; i++)
57                 if(target->attrib[i])
58                         free(target->attrib[i]);
59
60         if(target->attrib)free(target->attrib);
61         if(target->child)xml_free(target->child);
62         if(target->next)xml_free(target->next);
63         free(target->name);
64         free(target);
65 }
66
67 /* Raise flags if we have a character of special meaning.
68  * This is where I've hidden the switch statements :-p
69  */
70 static int is_special(char item)
71 {
72         if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
73                 return XML_LETTER;
74         if( item >= '0' && item <='9' )
75                 return XML_NUMBER;
76         if( item == 0x20 || item == '\t' ||     item == 0x0D || item == 0x0A )
77                 return XML_SPACE;
78         if( item == '/' )
79                 return XML_SLASH;
80         if( item == '<' )
81                 return XML_OPEN;
82         if( item == '=' )
83                 return XML_EQUALS;
84         if( item == '>' )
85                 return XML_CLOSE;
86         if( item == '"' || item == '\'' )
87                 return XML_QUOTE;
88         return 128;
89 }
90
91 /* Refresh the buffer, if possible */
92 static void xml_read_file(XMLBUF *xml)
93 {
94         int size;
95         
96         if(xml->eof)return;
97           
98         size = fread( xml->buf, 1, xml->len, xml->fptr);
99         if( size != xml->len )
100         {
101                 xml->len = size;
102                 xml->buf[size]=0;
103                 xml->eof = 1;
104         }
105 }
106
107
108 static void xml_end_file(XMLBUF *xml)
109 {
110         xml->len = 0;
111         xml->eof = 1;
112         xml->read_index = 0 ;
113         xml->error = 1;
114 }
115
116 /* All reading of the XML buffer done through these two functions */
117 /*** read a byte without advancing the offset */
118 static char xml_peek(XMLBUF *xml)
119 {
120         return xml->buf[xml->read_index];
121 }
122
123 /*** read a byte and advance the offset */
124 static char xml_read_byte(XMLBUF *xml)
125 {
126         char ret = xml_peek(xml);
127         xml->read_index++;
128         if(xml->read_index >= xml->len)
129         {
130                 if(xml->eof)
131                 {
132                   xml->read_index = xml->len;
133                   return ret;
134                 }
135                 xml->read_index = 0 ;
136                 xml_read_file(xml);
137         }
138         return ret;
139 }
140
141
142 /* skip over bytes matching the is_special mask */
143 static void xml_skip( XMLBUF *xml, int mask)
144 {
145         while( is_special(xml_peek(xml)) & mask && !(xml->eof && xml->read_index >= xml->len) )
146                 xml_read_byte(xml);
147 }
148
149
150 /* character matching tests for the feed functions */
151 static char quotechar = 0;
152 static int test_quote(const char x)
153 {
154         static int escaped=0;
155         if( escaped || '\\' == x )
156         {
157                 escaped = !escaped;
158                 return 1;
159         }
160         if( x != quotechar )
161                 return 1;
162         return 0;
163 }
164
165 static int feed_mask = 0;
166 static int test_mask(const char x)
167 {
168         return !(is_special(x) & feed_mask);
169 }
170
171 /*
172  * char* xml_feed(x, test)
173  *
174  * Reads as many contiguous chars that pass test() into a newly allocated
175  * string.
176  *
177  * Instead of calling xml_read_byte and flogging realloc() for each byte,
178  * it checks the buffer itself.
179 */
180 static char* xml_feed( XMLBUF *xml, int (*test)(char) )
181 {
182         int offset = xml->read_index;
183         int delta;
184         char *ret = NULL;
185         char *tmp = NULL;
186         int size = 0;
187
188         /* perform first and N middle realloc()'s */
189         while( test(xml->buf[offset]) )
190         {
191                 offset ++;
192
193                 if(offset >= xml->len)
194                 {
195                         delta = offset - xml->read_index;
196                         tmp = realloc(ret, size + delta + 1);
197                         if(!tmp)goto xml_feed_malloc;
198                         ret = tmp;
199                         memcpy(ret+size, xml->buf + xml->read_index, delta);
200                         size += delta;
201                         ret[size]=0;
202                         if(xml->eof)return ret;
203                         xml_read_file(xml);
204                         xml->read_index = 0;
205                         offset = 0;
206                 }
207         }
208         /* perform final realloc() if needed */
209         if(offset > xml->read_index)
210         {
211                 delta = offset - xml->read_index;
212                 tmp = realloc(ret, size + delta + 1);
213                 if(!tmp)goto xml_feed_malloc;
214                 ret = tmp;
215                 memcpy(ret+size, xml->buf + xml->read_index, delta);
216                 xml->read_index = offset;
217                 size += delta;
218                 ret[size]=0;
219         }
220         return ret;
221 xml_feed_malloc:
222         free(ret);
223         xml_end_file(xml);
224         return 0;
225 }
226
227 /* this reads attributes from tags, of the form...
228  *
229  * <tag attr1="some arguments" attr2=argument>
230  *
231  * It is aware of quotes, and will allow anything inside quoted arguments
232  */
233 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
234 {
235         int n=0;
236         char **tmp;
237
238         // how does this tag finish?
239         while(xml->len)
240         {
241                 if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
242                         return;
243
244                 n = ++node->nattrib;
245                 tmp = realloc(node->attrib, n * 2 * sizeof(char*) );
246                 if(!tmp)goto xml_read_attr_malloc;
247                 node->attrib = tmp;
248                 node->attrib[--n*2+1] = 0;
249                 
250                 feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
251                 node->attrib[n*2] = xml_feed(xml, test_mask );
252                 if( xml_peek(xml) == '=' )
253                 {
254                         xml_read_byte(xml);
255                         if( is_special(xml_peek(xml)) & XML_QUOTE )
256                         {
257                                 quotechar = xml_read_byte(xml);
258                                 node->attrib[n*2+1] = xml_feed(xml, test_quote);
259                                 xml_read_byte(xml);
260                         }
261                         else
262                         {
263                                 feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
264                                 node->attrib[n*2+1] = xml_feed(xml, test_mask);
265                         }
266                 }
267                 xml_skip(xml, XML_SPACE);
268         }
269         return;
270 xml_read_attr_malloc:
271         xml_end_file(xml);
272 }
273
274 /* The big decision maker, is it a regular node, or a text node.
275  * If it's a node, it will check if it should have children, and if so
276  * will recurse over them.
277  * Text nodes don't have children, so no recursing.
278  */
279 static XmlNode* xml_parse(struct XMLBUF *xml)
280 {
281         int toff;
282         char **tmp;
283         char *stmp;
284         XmlNode **this, *ret = NULL;
285
286         this = &ret;
287
288         xml_skip(xml, XML_SPACE);       // skip whitespace
289         while( (xml->read_index < xml->len) || !xml->eof )
290         {
291                 switch(is_special(xml_peek(xml)))
292                 {
293                         case XML_OPEN:
294                                 xml_read_byte(xml);
295                                 if(xml_peek(xml) == '/')
296                                         return ret;             // parents close tag
297                                 // read the tag name
298                                 feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
299                                 *this = xml_new( xml_feed(xml, test_mask));
300                                 if(xml->error)goto xml_parse_malloc;
301                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
302
303                                 xml_read_attr(xml, *this);      // read attributes
304
305                                 // how does this tag finish?
306                                 switch(is_special(xml_peek(xml)))
307                                 {
308                                         case XML_CLOSE:         // child-nodes ahead
309                                                 xml_read_byte(xml);
310                                                 (*this)->child = xml_parse(xml);
311                                                 xml_skip(xml, XML_ALL ^ XML_CLOSE);
312                                                 xml_read_byte(xml);
313                                                 break;
314                                         case XML_SLASH:         // self closing tag
315                                                 xml_read_byte(xml);
316                                                 xml_read_byte(xml);
317                                                 break;
318                                 }
319                                 break;
320
321                         default:        // text node
322                                 *this = xml_new(0);
323                                 xml_skip(xml, XML_SPACE);       // skip any whitespace
324                                 feed_mask = XML_OPEN;
325                                 (*this)->nattrib=1;
326                                 tmp = malloc(sizeof(char*)*2);
327                                 if(!tmp)goto xml_parse_malloc;
328                                 (*this)->attrib = tmp;
329                                 (*this)->attrib[1] = NULL;
330                                 stmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
331
332                                 /* trim the whitespace off the end of text nodes,
333                                  * by overwriting the spaces will null termination. */
334                                 toff = strlen(stmp)-1;
335                                 while( ( is_special(stmp[toff]) & XML_SPACE ) )
336                                 {
337                                         stmp[toff] = 0;
338                                         toff --;
339                                 }
340
341                                 break;
342                 }
343                 this = &(*this)->next;
344                 xml_skip(xml, XML_SPACE);       // skip whitespace
345         }
346
347         return ret;
348 xml_parse_malloc:
349         xml_end_file(xml);
350         if(ret)xml_free(ret);
351         return 0;
352 }
353
354
355 /* bootstrap the structures for xml_parse() to be able to get started */
356 XmlNode* xml_load(const char * filename)
357 {
358         struct XMLBUF xml;
359         XmlNode *ret = NULL;
360
361 //      printf("xml_load(\"%s\");\n", filename);
362
363         xml.error = 0;
364         xml.eof = 0;
365         xml.read_index = 0;
366         xml.fptr = fopen(filename, "rb");
367         if(!xml.fptr)
368                 return NULL;
369
370         xml.buf = malloc(BUFFER+1);
371         if(!xml.buf)
372                 goto xml_load_fail_malloc_buf;
373         xml.buf[BUFFER]=0;
374         xml.len = BUFFER;
375         
376         xml_read_file(&xml);
377
378         ret = xml_parse(&xml);
379
380         if(xml.error)
381         {
382                 xml_free(ret);
383                 ret = NULL;
384         }
385
386         free(xml.buf);
387 xml_load_fail_malloc_buf:
388         fclose(xml.fptr);
389         return ret;
390 }
391
392 /* very basic function that will get you the first node with a given name */
393 XmlNode * xml_find(XmlNode *xml, const char *name)
394 {
395         XmlNode * ret;
396         if(xml->name)if(!strcmp(xml->name, name))return xml;
397         if(xml->child)
398         {
399                 ret = xml_find(xml->child, name);
400                 if(ret)return ret;
401         }
402         if(xml->next)
403         {
404                 ret = xml_find(xml->next, name);
405                 if(ret)return ret;
406         }
407         return NULL;
408 }
409
410 /* very basic attribute lookup function */
411 char* xml_attr(XmlNode *x, const char *name)
412 {
413         int i;
414         for(i=0; i<x->nattrib; i++)
415                 if(x->attrib[i*2])
416                         if(!strcmp(x->attrib[i*2], name))
417                                 return x->attrib[i*2+1];
418         return 0;
419 }
420
421