ttxml: Rplaced ratchet buffer with regular buffer, 20x speed increase
authorDaniel Burke <dan.p.burke@gmail.com>
Mon, 5 Sep 2011 01:10:52 +0000 (10:40 +0930)
committerDaniel Burke <dan.p.burke@gmail.com>
Mon, 5 Sep 2011 01:10:52 +0000 (10:40 +0930)
ccan/ttxml/ttxml.c
ccan/ttxml/ttxml.h

index 80c34841297b109c049ebb68f40bee10bd8090f8..791ced7cd4e65273d4eea06027acc2f8b74d24f1 100644 (file)
@@ -1,4 +1,5 @@
 /* Licensed under GPL - see LICENSE file for details */\r
+\r
 #include <stdlib.h>\r
 #include <string.h>\r
 #include <stdio.h>\r
@@ -6,7 +7,34 @@
 #include "ttxml.h"\r
 \r
 \r
-XmlNode* xml_new(char * name, char * attrib)\r
+#define BUFFER 3264\r
+\r
+\r
+#define XML_LETTER     1\r
+#define XML_NUMBER     2\r
+#define XML_SPACE      4\r
+#define XML_SLASH      8\r
+#define XML_OPEN       16\r
+#define XML_EQUALS     32\r
+#define XML_CLOSE      64\r
+#define XML_QUOTE      128\r
+#define XML_OTHER      256\r
+\r
+#define XML_ALL 0xFFFFFFFF\r
+\r
+\r
+typedef struct XMLBUF\r
+{\r
+       FILE * fptr;\r
+       char * buf;\r
+       int len;\r
+       int read_index;\r
+       int eof;\r
+} XMLBUF;\r
+\r
+\r
+/* Allocate a new XmlNode */\r
+XmlNode* xml_new(char * name)\r
 {\r
        XmlNode * ret = malloc(sizeof(XmlNode));\r
        if(!ret)return NULL;\r
@@ -19,7 +47,7 @@ XmlNode* xml_new(char * name, char * attrib)
        return ret;\r
 }\r
 \r
-\r
+/* free a previously allocated XmlNode */\r
 void xml_free(XmlNode *target)\r
 {\r
        int i;\r
@@ -34,19 +62,12 @@ void xml_free(XmlNode *target)
        free(target);\r
 }\r
 \r
-#define XML_LETTER     1\r
-#define XML_NUMBER     2\r
-#define XML_SPACE      4\r
-#define XML_SLASH      8\r
-#define XML_OPEN       16\r
-#define XML_EQUALS     32\r
-#define XML_CLOSE      64\r
-#define XML_QUOTE      128\r
-#define XML_OTHER      256\r
-\r
-#define XML_ALL 0xFFFFFFFF\r
-\r
-static int is_special(char item)\r
+/* raise flags if we have a character of special meaning\r
+ *\r
+ * This is where I've hidden the switch statements :-p\r
+ *\r
+ */\r
+int is_special(char item)\r
 {\r
        if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))\r
                return XML_LETTER;\r
@@ -67,67 +88,53 @@ static int is_special(char item)
        return 128;\r
 }\r
 \r
-struct XMLBUF\r
+/* Refresh the buffer, expects not to be called when EOF */\r
+static void xml_read_file(XMLBUF *xml)\r
 {\r
-       FILE * fptr;\r
-       char * buf;\r
-       int len;\r
-       int eof;\r
-};\r
-\r
-\r
-static void xml_consume(struct XMLBUF *xml, int offset)\r
-{\r
-       int size, request, received;\r
+       int size;\r
        \r
-       size = xml->len - offset;\r
-\r
-       if(!xml->len)\r
-               return;\r
-\r
-       if(size)\r
-       {\r
-//             printf("Size=%d, off=%d, len=%d\n", size, offset, xml->len);\r
-               memmove(xml->buf, xml->buf + offset, size);\r
-       }\r
-\r
-       if(xml->eof)\r
+       size = fread( xml->buf, 1, xml->len, xml->fptr);\r
+       if( size != xml->len )\r
        {\r
                xml->len = size;\r
                xml->buf[size]=0;\r
-               return;\r
+               xml->eof = 1;\r
        }\r
+}\r
 \r
-       request = xml->len - size;\r
-       received = fread(xml->buf + size, 1, request, xml->fptr);\r
-       if( received == request )\r
-               return;\r
 \r
-       xml->len = size + received;\r
-       xml->eof = 1;\r
-       xml->buf[xml->len] = 0;\r
-       return;\r
+/* All reading of the XML buffer done through these two functions */\r
+/*** read a byte without advancing the offset */\r
+static char xml_peek(XMLBUF *xml)\r
+{\r
+       return xml->buf[xml->read_index];\r
 }\r
 \r
-\r
-static void xml_skip( struct XMLBUF *xml, int mask )\r
+/*** read a byte and advance the offset */\r
+static char xml_read_byte(XMLBUF *xml)\r
 {\r
-       int offset = 0;\r
-       if(!xml->len)return;\r
-       while( is_special(xml->buf[offset]) & mask )\r
+       char ret = xml_peek(xml);\r
+       xml->read_index++;\r
+       if(xml->read_index >= xml->len)\r
        {\r
-               offset ++;\r
-               if(offset == xml->len)\r
-               {\r
-                       xml_consume(xml, offset);\r
-                       offset = 0;\r
-                       if(!xml->len)\r
-                               return;\r
-               }\r
+               if(xml->eof)return ret;\r
+               xml->read_index = 0 ;\r
+               xml_read_file(xml);\r
        }\r
-       xml_consume(xml, offset);\r
+       return ret;\r
 }\r
 \r
+\r
+/* skip over bytes matching the is_special mask */\r
+static void xml_skip( XMLBUF *xml, int mask)\r
+{\r
+       printf("just called\n");\r
+       while( is_special(xml_peek(xml)) & mask && xml->len )\r
+               xml_read_byte(xml);\r
+}\r
+\r
+\r
+/* character matching tests for the feed functions */\r
 static char quotechar = 0;\r
 static int test_quote(const char x)\r
 {\r
@@ -148,38 +155,59 @@ static int test_mask(const char x)
        return !(is_special(x) & feed_mask);\r
 }\r
 \r
-static char* xml_feed( struct XMLBUF *xml, int (*test)(char) )\r
+/*\r
+ * char* xml_feed(x, test)\r
+ *\r
+ * Reads as many contiguous chars that pass test() into a newly allocated\r
+ * string.\r
+ *\r
+ * Instead of calling xml_read_byte and flogging realloc() for each byte,\r
+ * it checks the buffer itself.\r
+*/\r
+static char* xml_feed( XMLBUF *xml, int (*test)(char) )\r
 {\r
-       int offset = 0;\r
+       int offset = xml->read_index;\r
+       int delta;\r
        char *ret = NULL;\r
        int size = 0;\r
 \r
+       /* perform first and N middle realloc()'s */\r
        while( test(xml->buf[offset]) )\r
        {\r
-               offset++;\r
-               if(offset == xml->len)\r
+               offset ++;\r
+\r
+               if(offset >= xml->len)\r
                {\r
-                       ret = realloc(ret, size+offset+1);\r
-                       memcpy(ret+size, xml->buf, offset);\r
-                       size += offset;\r
+                       delta = offset - xml->read_index;\r
+                       ret = realloc(ret, size + delta + 1);\r
+                       memcpy(ret+size, xml->buf + xml->read_index, delta);\r
+                       size += delta;\r
                        ret[size]=0;\r
-                       xml_consume(xml, offset);\r
+                       if(xml->eof)return ret;\r
+                       xml_read_file(xml);\r
+                       xml->read_index = 0;\r
                        offset = 0;\r
-                       if(!xml->len)return ret;\r
                }\r
        }\r
-\r
-       if(offset)\r
+       /* perform final realloc() if needed */\r
+       if(offset > xml->read_index)\r
        {\r
-               ret = realloc(ret, size+offset+1);\r
-               memcpy(ret+size, xml->buf, offset);\r
-               size += offset;\r
+               delta = offset - xml->read_index;\r
+               ret = realloc(ret, size + delta + 1);\r
+               memcpy(ret+size, xml->buf + xml->read_index, delta);\r
+               xml->read_index = offset;\r
+               size += delta;\r
                ret[size]=0;\r
-               xml_consume(xml, offset);\r
        }\r
        return ret;\r
 }\r
 \r
+/* this reads attributes from tags, of the form...\r
+ *\r
+ * <tag attr1="some arguments" attr2=argument>\r
+ *\r
+ * It is aware of quotes, and will allow anything inside quoted arguments\r
+ */\r
 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)\r
 {\r
        int n=0;\r
@@ -187,7 +215,7 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
        // how does this tag finish?\r
        while(xml->len)\r
        {\r
-               if( is_special(xml->buf[0]) & (XML_CLOSE | XML_SLASH) )\r
+               if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )\r
                        return;\r
 \r
                n = ++node->nattrib;\r
@@ -196,19 +224,18 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
                \r
                feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;\r
                node->attrib[n*2] = xml_feed(xml, test_mask );\r
-               if( xml->buf[0] == '=' )\r
+               if( xml_peek(xml) == '=' )\r
                {\r
-                       if( is_special(xml->buf[1]) & XML_QUOTE )\r
+                       xml_read_byte(xml);\r
+                       if( is_special(xml_peek(xml)) & XML_QUOTE )\r
                        {\r
-                               quotechar = xml->buf[1];\r
-                               xml_consume(xml, 2);\r
+                               quotechar = xml_read_byte(xml);\r
                                node->attrib[n*2+1] = xml_feed(xml, test_quote);\r
-                               xml_consume(xml, 1);\r
+                               xml_read_byte(xml);\r
                        }\r
                        else\r
                        {\r
                                feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;\r
-                               xml_consume(xml, 1);\r
                                node->attrib[n*2+1] = xml_feed(xml, test_mask);\r
                        }\r
                }\r
@@ -216,6 +243,11 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
        }\r
 }\r
 \r
+/* The big decision maker, is it a regular node, or a text node.\r
+ * If it's a node, it will check if it should have children, and if so\r
+ * will recurse over them.\r
+ * Text nodes don't have children, so no recursing.\r
+ */\r
 static XmlNode* xml_parse(struct XMLBUF *xml)\r
 {\r
        int offset;\r
@@ -227,43 +259,48 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
 \r
        xml_skip(xml, XML_SPACE);       // skip whitespace\r
        offset=0;\r
-       while(xml->len)\r
+       while( (xml->read_index < xml->len) || !xml->eof )\r
        {\r
-               switch(is_special(xml->buf[offset]))\r
+               switch(is_special(xml_peek(xml)))\r
                {\r
                        case XML_OPEN:\r
-                               xml_consume(xml, 1);\r
-                               if(xml->buf[offset] == '/')\r
+                               xml_read_byte(xml);\r
+                               if(xml_peek(xml) == '/')\r
                                        return ret;             // parents close tag\r
                                // read the tag name\r
                                feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;\r
-                               *this = xml_new( xml_feed(xml, test_mask), NULL );\r
+                               *this = xml_new( xml_feed(xml, test_mask));\r
                                xml_skip(xml, XML_SPACE);       // skip any whitespace\r
 \r
                                xml_read_attr(xml, *this);      // read attributes\r
 \r
                                // how does this tag finish?\r
-                               switch(is_special(xml->buf[0]))\r
+                               switch(is_special(xml_peek(xml)))\r
                                {\r
                                        case XML_CLOSE:         // child-nodes ahead\r
-                                               xml_consume(xml, 1);\r
+                                               xml_read_byte(xml);\r
                                                (*this)->child = xml_parse(xml);\r
                                                xml_skip(xml, XML_ALL ^ XML_CLOSE);\r
-                                               xml_consume(xml, 1);\r
+                                               xml_read_byte(xml);\r
                                                break;\r
                                        case XML_SLASH:         // self closing tag\r
-                                               xml_consume(xml, 2);\r
+                                               xml_read_byte(xml);\r
+                                               xml_read_byte(xml);\r
                                                break;\r
                                }\r
                                break;\r
 \r
                        default:        // text node\r
-                               *this = xml_new(0, 0);\r
+                               *this = xml_new(0);\r
                                xml_skip(xml, XML_SPACE);       // skip any whitespace\r
                                feed_mask = XML_OPEN;\r
                                (*this)->nattrib=1;\r
                                (*this)->attrib = malloc(sizeof(char*)*2);\r
+                               (*this)->attrib[1] = NULL;\r
                                tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);\r
+\r
+                               /* trim the whitespace off the end of text nodes,\r
+                                * by overwriting the spaces will null termination. */\r
                                toff = strlen(tmp)-1;\r
                                while( ( is_special(tmp[toff]) & XML_SPACE ) )\r
                                {\r
@@ -271,7 +308,6 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
                                        toff --;\r
                                }\r
 \r
-                               (*this)->attrib[1] = NULL;\r
                                break;\r
                }\r
                this = &(*this)->next; \r
@@ -282,14 +318,16 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
 }\r
 \r
 \r
-\r
-#define BUF 3264\r
+/* bootstrap the structures for xml_parse() to be able to get started */\r
 XmlNode* xml_load(const char * filename)\r
 {\r
        struct XMLBUF xml;\r
        XmlNode *ret = NULL;\r
 \r
+//     printf("xml_load(\"%s\");\n", filename);\r
+\r
        xml.eof = 0;\r
+       xml.read_index = 0;\r
        xml.fptr = fopen(filename, "rb");\r
        if(!xml.fptr)\r
        {\r
@@ -297,12 +335,13 @@ XmlNode* xml_load(const char * filename)
                return NULL;\r
        }\r
 \r
-       xml.buf = malloc(BUF);\r
+       xml.buf = malloc(BUFFER+1);\r
+       xml.buf[BUFFER]=0;\r
        if(!xml.buf)\r
                goto xml_load_fail_malloc_buf;\r
        \r
-       xml.len = fread(xml.buf, 1, BUF, xml.fptr);\r
-       if(xml.len < BUF)\r
+       xml.len = fread(xml.buf, 1, BUFFER, xml.fptr);\r
+       if(xml.len < BUFFER)\r
                xml.eof = 1;\r
 \r
        ret = xml_parse(&xml);\r
@@ -312,8 +351,8 @@ xml_load_fail_malloc_buf:
        fclose(xml.fptr);\r
        return ret;\r
 }\r
-#undef BUF\r
 \r
+/* very basic function that will get you the first node with a given name */\r
 XmlNode * xml_find(XmlNode *xml, const char *name)\r
 {\r
        XmlNode * ret;\r
@@ -331,7 +370,7 @@ XmlNode * xml_find(XmlNode *xml, const char *name)
        return NULL;\r
 }\r
 \r
-\r
+/* very basic attribute lookup function */\r
 char* xml_attr(XmlNode *x, const char *name)\r
 {\r
        int i;\r
@@ -344,6 +383,7 @@ char* xml_attr(XmlNode *x, const char *name)
 \r
 \r
 #ifdef TEST\r
+/* print out the heirarchy of an XML file, useful for debugging */\r
 void xp(XmlNode *x, int level, int max)\r
 {\r
        int i;\r
@@ -357,6 +397,7 @@ void xp(XmlNode *x, int level, int max)
        if(x->name)\r
        for(i=0; i<x->nattrib; i++)\r
                printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]);\r
+       else printf("%s", x->attrib[0]);\r
        printf("\n");\r
        if(x->child)xp(x->child, level+1, max);\r
        if(x->next)xp(x->next, level, max);\r
@@ -365,7 +406,8 @@ void xp(XmlNode *x, int level, int max)
 \r
 int main(int argc, char *argv[])\r
 {\r
-       XmlNode *x;\r
+       XmlNode *x, *tmp;\r
+       int i;\r
 \r
        if(!argv[1])\r
        {\r
@@ -374,19 +416,29 @@ int main(int argc, char *argv[])
                return 1;\r
        }\r
 \r
-       printf("Loading file \"%s\"\n", argv[1]);\r
-\r
-       x = xml_load(argv[1]);\r
-\r
-       if(!x)\r
+#ifdef PROFILE\r
+       for(i=0; i<1000; i++)\r
        {\r
-               printf("Failed to load.\n");\r
-               return 2;\r
+#endif\r
+               x = xml_load(argv[1]);\r
+\r
+               if(!x)\r
+               {\r
+                       printf("Failed to load.\n");\r
+                       return 2;\r
+               }\r
+#ifndef PROFILE\r
+               xp(x, 1, 20);\r
+#endif\r
+               xml_free(x);\r
+#ifdef PROFILE\r
        }\r
+#endif\r
 \r
-       xp(x, 1, 6);\r
-       xml_free(x);\r
-       printf("Happily free.\n");\r
+       \r
+//     tmp = xml_find(x, "geometry");\r
+//     xp(x, 1, 6);\r
+//     printf("Happily free.\n");\r
        return 0;\r
 }\r
 #endif\r
index e201a36cdc1cdf39c9edc09f4a7e12e00105be39..4b5017771ca8b3829cc595b9c8ca0305c9373a8e 100644 (file)
@@ -1,4 +1,4 @@
-/* Licensed under GPL - see LICENSE file for details */
+
 typedef struct XmlNode {
        char * name;
        char ** attrib;
@@ -8,7 +8,7 @@ typedef struct XmlNode {
 } XmlNode;
 
 
-XmlNode* xml_new(char * name, char * attrib);
+XmlNode* xml_new(char * name);
 XmlNode* xml_load(const char * filename);
 void xml_free(XmlNode *target);
 char* xml_attr(XmlNode *x, const char *name);