From: Daniel Burke Date: Mon, 5 Sep 2011 01:10:52 +0000 (+0930) Subject: ttxml: Rplaced ratchet buffer with regular buffer, 20x speed increase X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=385e0e538b106d2cbd3d9c2172150816a8b39b5d ttxml: Rplaced ratchet buffer with regular buffer, 20x speed increase --- diff --git a/ccan/ttxml/ttxml.c b/ccan/ttxml/ttxml.c index 80c34841..791ced7c 100644 --- a/ccan/ttxml/ttxml.c +++ b/ccan/ttxml/ttxml.c @@ -1,4 +1,5 @@ /* Licensed under GPL - see LICENSE file for details */ + #include #include #include @@ -6,7 +7,34 @@ #include "ttxml.h" -XmlNode* xml_new(char * name, char * attrib) +#define BUFFER 3264 + + +#define XML_LETTER 1 +#define XML_NUMBER 2 +#define XML_SPACE 4 +#define XML_SLASH 8 +#define XML_OPEN 16 +#define XML_EQUALS 32 +#define XML_CLOSE 64 +#define XML_QUOTE 128 +#define XML_OTHER 256 + +#define XML_ALL 0xFFFFFFFF + + +typedef struct XMLBUF +{ + FILE * fptr; + char * buf; + int len; + int read_index; + int eof; +} XMLBUF; + + +/* Allocate a new XmlNode */ +XmlNode* xml_new(char * name) { XmlNode * ret = malloc(sizeof(XmlNode)); if(!ret)return NULL; @@ -19,7 +47,7 @@ XmlNode* xml_new(char * name, char * attrib) return ret; } - +/* free a previously allocated XmlNode */ void xml_free(XmlNode *target) { int i; @@ -34,19 +62,12 @@ void xml_free(XmlNode *target) free(target); } -#define XML_LETTER 1 -#define XML_NUMBER 2 -#define XML_SPACE 4 -#define XML_SLASH 8 -#define XML_OPEN 16 -#define XML_EQUALS 32 -#define XML_CLOSE 64 -#define XML_QUOTE 128 -#define XML_OTHER 256 - -#define XML_ALL 0xFFFFFFFF - -static int is_special(char item) +/* raise flags if we have a character of special meaning + * + * This is where I've hidden the switch statements :-p + * + */ +int is_special(char item) { if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z')) return XML_LETTER; @@ -67,67 +88,53 @@ static int is_special(char item) return 128; } -struct XMLBUF +/* Refresh the buffer, expects not to be called when EOF */ +static void xml_read_file(XMLBUF *xml) { - FILE * fptr; - char * buf; - int len; - int eof; -}; - - -static void xml_consume(struct XMLBUF *xml, int offset) -{ - int size, request, received; + int size; - size = xml->len - offset; - - if(!xml->len) - return; - - if(size) - { -// printf("Size=%d, off=%d, len=%d\n", size, offset, xml->len); - memmove(xml->buf, xml->buf + offset, size); - } - - if(xml->eof) + size = fread( xml->buf, 1, xml->len, xml->fptr); + if( size != xml->len ) { xml->len = size; xml->buf[size]=0; - return; + xml->eof = 1; } +} - request = xml->len - size; - received = fread(xml->buf + size, 1, request, xml->fptr); - if( received == request ) - return; - xml->len = size + received; - xml->eof = 1; - xml->buf[xml->len] = 0; - return; +/* All reading of the XML buffer done through these two functions */ +/*** read a byte without advancing the offset */ +static char xml_peek(XMLBUF *xml) +{ + return xml->buf[xml->read_index]; } - -static void xml_skip( struct XMLBUF *xml, int mask ) +/*** read a byte and advance the offset */ +static char xml_read_byte(XMLBUF *xml) { - int offset = 0; - if(!xml->len)return; - while( is_special(xml->buf[offset]) & mask ) + char ret = xml_peek(xml); + xml->read_index++; + if(xml->read_index >= xml->len) { - offset ++; - if(offset == xml->len) - { - xml_consume(xml, offset); - offset = 0; - if(!xml->len) - return; - } + if(xml->eof)return ret; + xml->read_index = 0 ; + xml_read_file(xml); } - xml_consume(xml, offset); + return ret; } + +/* skip over bytes matching the is_special mask */ +static void xml_skip( XMLBUF *xml, int mask) +{ + printf("just called\n"); + while( is_special(xml_peek(xml)) & mask && xml->len ) + xml_read_byte(xml); +} + + +/* character matching tests for the feed functions */ static char quotechar = 0; static int test_quote(const char x) { @@ -148,38 +155,59 @@ static int test_mask(const char x) return !(is_special(x) & feed_mask); } -static char* xml_feed( struct XMLBUF *xml, int (*test)(char) ) +/* + * char* xml_feed(x, test) + * + * Reads as many contiguous chars that pass test() into a newly allocated + * string. + * + * Instead of calling xml_read_byte and flogging realloc() for each byte, + * it checks the buffer itself. +*/ +static char* xml_feed( XMLBUF *xml, int (*test)(char) ) { - int offset = 0; + int offset = xml->read_index; + int delta; char *ret = NULL; int size = 0; + /* perform first and N middle realloc()'s */ while( test(xml->buf[offset]) ) { - offset++; - if(offset == xml->len) + offset ++; + + if(offset >= xml->len) { - ret = realloc(ret, size+offset+1); - memcpy(ret+size, xml->buf, offset); - size += offset; + delta = offset - xml->read_index; + ret = realloc(ret, size + delta + 1); + memcpy(ret+size, xml->buf + xml->read_index, delta); + size += delta; ret[size]=0; - xml_consume(xml, offset); + if(xml->eof)return ret; + xml_read_file(xml); + xml->read_index = 0; offset = 0; - if(!xml->len)return ret; } } - - if(offset) + /* perform final realloc() if needed */ + if(offset > xml->read_index) { - ret = realloc(ret, size+offset+1); - memcpy(ret+size, xml->buf, offset); - size += offset; + delta = offset - xml->read_index; + ret = realloc(ret, size + delta + 1); + memcpy(ret+size, xml->buf + xml->read_index, delta); + xml->read_index = offset; + size += delta; ret[size]=0; - xml_consume(xml, offset); } return ret; } +/* this reads attributes from tags, of the form... + * + * + * + * It is aware of quotes, and will allow anything inside quoted arguments + */ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node) { int n=0; @@ -187,7 +215,7 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node) // how does this tag finish? while(xml->len) { - if( is_special(xml->buf[0]) & (XML_CLOSE | XML_SLASH) ) + if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) ) return; n = ++node->nattrib; @@ -196,19 +224,18 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node) feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH; node->attrib[n*2] = xml_feed(xml, test_mask ); - if( xml->buf[0] == '=' ) + if( xml_peek(xml) == '=' ) { - if( is_special(xml->buf[1]) & XML_QUOTE ) + xml_read_byte(xml); + if( is_special(xml_peek(xml)) & XML_QUOTE ) { - quotechar = xml->buf[1]; - xml_consume(xml, 2); + quotechar = xml_read_byte(xml); node->attrib[n*2+1] = xml_feed(xml, test_quote); - xml_consume(xml, 1); + xml_read_byte(xml); } else { feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH; - xml_consume(xml, 1); node->attrib[n*2+1] = xml_feed(xml, test_mask); } } @@ -216,6 +243,11 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node) } } +/* The big decision maker, is it a regular node, or a text node. + * If it's a node, it will check if it should have children, and if so + * will recurse over them. + * Text nodes don't have children, so no recursing. + */ static XmlNode* xml_parse(struct XMLBUF *xml) { int offset; @@ -227,43 +259,48 @@ static XmlNode* xml_parse(struct XMLBUF *xml) xml_skip(xml, XML_SPACE); // skip whitespace offset=0; - while(xml->len) + while( (xml->read_index < xml->len) || !xml->eof ) { - switch(is_special(xml->buf[offset])) + switch(is_special(xml_peek(xml))) { case XML_OPEN: - xml_consume(xml, 1); - if(xml->buf[offset] == '/') + xml_read_byte(xml); + if(xml_peek(xml) == '/') return ret; // parents close tag // read the tag name feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE; - *this = xml_new( xml_feed(xml, test_mask), NULL ); + *this = xml_new( xml_feed(xml, test_mask)); xml_skip(xml, XML_SPACE); // skip any whitespace xml_read_attr(xml, *this); // read attributes // how does this tag finish? - switch(is_special(xml->buf[0])) + switch(is_special(xml_peek(xml))) { case XML_CLOSE: // child-nodes ahead - xml_consume(xml, 1); + xml_read_byte(xml); (*this)->child = xml_parse(xml); xml_skip(xml, XML_ALL ^ XML_CLOSE); - xml_consume(xml, 1); + xml_read_byte(xml); break; case XML_SLASH: // self closing tag - xml_consume(xml, 2); + xml_read_byte(xml); + xml_read_byte(xml); break; } break; default: // text node - *this = xml_new(0, 0); + *this = xml_new(0); xml_skip(xml, XML_SPACE); // skip any whitespace feed_mask = XML_OPEN; (*this)->nattrib=1; (*this)->attrib = malloc(sizeof(char*)*2); + (*this)->attrib[1] = NULL; tmp = (*this)->attrib[0] = xml_feed(xml, test_mask); + + /* trim the whitespace off the end of text nodes, + * by overwriting the spaces will null termination. */ toff = strlen(tmp)-1; while( ( is_special(tmp[toff]) & XML_SPACE ) ) { @@ -271,7 +308,6 @@ static XmlNode* xml_parse(struct XMLBUF *xml) toff --; } - (*this)->attrib[1] = NULL; break; } this = &(*this)->next; @@ -282,14 +318,16 @@ static XmlNode* xml_parse(struct XMLBUF *xml) } - -#define BUF 3264 +/* bootstrap the structures for xml_parse() to be able to get started */ XmlNode* xml_load(const char * filename) { struct XMLBUF xml; XmlNode *ret = NULL; +// printf("xml_load(\"%s\");\n", filename); + xml.eof = 0; + xml.read_index = 0; xml.fptr = fopen(filename, "rb"); if(!xml.fptr) { @@ -297,12 +335,13 @@ XmlNode* xml_load(const char * filename) return NULL; } - xml.buf = malloc(BUF); + xml.buf = malloc(BUFFER+1); + xml.buf[BUFFER]=0; if(!xml.buf) goto xml_load_fail_malloc_buf; - xml.len = fread(xml.buf, 1, BUF, xml.fptr); - if(xml.len < BUF) + xml.len = fread(xml.buf, 1, BUFFER, xml.fptr); + if(xml.len < BUFFER) xml.eof = 1; ret = xml_parse(&xml); @@ -312,8 +351,8 @@ xml_load_fail_malloc_buf: fclose(xml.fptr); return ret; } -#undef BUF +/* very basic function that will get you the first node with a given name */ XmlNode * xml_find(XmlNode *xml, const char *name) { XmlNode * ret; @@ -331,7 +370,7 @@ XmlNode * xml_find(XmlNode *xml, const char *name) return NULL; } - +/* very basic attribute lookup function */ char* xml_attr(XmlNode *x, const char *name) { int i; @@ -344,6 +383,7 @@ char* xml_attr(XmlNode *x, const char *name) #ifdef TEST +/* print out the heirarchy of an XML file, useful for debugging */ void xp(XmlNode *x, int level, int max) { int i; @@ -357,6 +397,7 @@ void xp(XmlNode *x, int level, int max) if(x->name) for(i=0; inattrib; i++) printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]); + else printf("%s", x->attrib[0]); printf("\n"); if(x->child)xp(x->child, level+1, max); if(x->next)xp(x->next, level, max); @@ -365,7 +406,8 @@ void xp(XmlNode *x, int level, int max) int main(int argc, char *argv[]) { - XmlNode *x; + XmlNode *x, *tmp; + int i; if(!argv[1]) { @@ -374,19 +416,29 @@ int main(int argc, char *argv[]) return 1; } - printf("Loading file \"%s\"\n", argv[1]); - - x = xml_load(argv[1]); - - if(!x) +#ifdef PROFILE + for(i=0; i<1000; i++) { - printf("Failed to load.\n"); - return 2; +#endif + x = xml_load(argv[1]); + + if(!x) + { + printf("Failed to load.\n"); + return 2; + } +#ifndef PROFILE + xp(x, 1, 20); +#endif + xml_free(x); +#ifdef PROFILE } +#endif - xp(x, 1, 6); - xml_free(x); - printf("Happily free.\n"); + +// tmp = xml_find(x, "geometry"); +// xp(x, 1, 6); +// printf("Happily free.\n"); return 0; } #endif diff --git a/ccan/ttxml/ttxml.h b/ccan/ttxml/ttxml.h index e201a36c..4b501777 100644 --- a/ccan/ttxml/ttxml.h +++ b/ccan/ttxml/ttxml.h @@ -1,4 +1,4 @@ -/* Licensed under GPL - see LICENSE file for details */ + typedef struct XmlNode { char * name; char ** attrib; @@ -8,7 +8,7 @@ typedef struct XmlNode { } XmlNode; -XmlNode* xml_new(char * name, char * attrib); +XmlNode* xml_new(char * name); XmlNode* xml_load(const char * filename); void xml_free(XmlNode *target); char* xml_attr(XmlNode *x, const char *name);