Added module stringmap
authorJoey Adams <joeyadams3.14159@gmail.com>
Wed, 15 Jul 2009 05:19:52 +0000 (01:19 -0400)
committerJoey Adams <joeyadams3.14159@gmail.com>
Wed, 15 Jul 2009 05:19:52 +0000 (01:19 -0400)
ccan/stringmap/_info [new file with mode: 0644]
ccan/stringmap/stringmap.c [new file with mode: 0644]
ccan/stringmap/stringmap.h [new file with mode: 0644]
ccan/stringmap/test/run.c [new file with mode: 0644]

diff --git a/ccan/stringmap/_info b/ccan/stringmap/_info
new file mode 100644 (file)
index 0000000..38f3067
--- /dev/null
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * stringmap - Macros for mapping strings to things
+ *
+ * stringmap provides a generic string map via macros.
+ *
+ * Features which are sorely lacking in this version of stringmap are deletion and traversal.
+ *
+ * Example:
+ *
+ * #include <ccan/stringmap/stringmap.h>                                                                                                                                        
+ *
+ * static const char *get_string(void) {                                                                                                                                        
+ *      static char buffer[4096];                                                                                                                                               
+ *      char *tail;                                                                                                                                                             
+ *      if (!fgets(buffer, sizeof(buffer), stdin))                                                                                                                              
+ *              return NULL;                                                                                                                                                    
+ *      tail = strchr(buffer, 0);                                                                                                                                               
+ *      if (tail>buffer && tail[-1]=='\n')                                                                                                                                      
+ *              *--tail = 0;                                                                                                                                                    
+ *      if (!*buffer)                                                                                                                                                           
+ *              return NULL;                                                                                                                                                    
+ *      return buffer;                                                                                                                                                          
+ * }                                                                                                                                                                            
+ *
+ * int main(void) {                                                                                                                                                             
+ *      stringmap(int) map = stringmap_new(NULL);
+ *      const char *string;
+ *
+ *      while ((string = get_string()) != NULL) {
+ *              int *count = stringmap_lookup(map, string);
+ *
+ *              if (!count) {
+ *                      printf("\"%s\" is new\n", string);
+ *                      count = stringmap_enter(map, string);
+ *              }
+ *
+ *              (*count) ++;
+ *
+ *              printf("\"%s\" has been entered %d times\n", string, *count);
+ *      }
+ *
+ *      stringmap_free(map);
+ *
+ *    return 0;
+ * }
+ *
+ *     Authors: Joey Adams, Anders Magnusson
+ *     License: BSD
+ */
+int main(int argc, char *argv[])
+{
+       /* Expect exactly one argument */
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               printf("ccan/block_pool\n");
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/stringmap/stringmap.c b/ccan/stringmap/stringmap.c
new file mode 100644 (file)
index 0000000..059dbaa
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se).
+ * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a heavily modified version of the Patricia tree implementation
+   in PCC at http://pcc.zentus.com/cgi-bin/cvsweb.cgi/cc/cpp/cpp.c?rev=1.96 */
+
+#include "stringmap.h"
+
+#if 0
+#include <assert.h>
+#else
+#define assert(...) do {} while(0)
+#endif
+
+#define BITNO(x)               ((x) & ~(LEFT_IS_LEAF|RIGHT_IS_LEAF))
+#define LEFT_IS_LEAF           0x80000000
+#define RIGHT_IS_LEAF          0x40000000
+#define IS_LEFT_LEAF(x)                (((x) & LEFT_IS_LEAF) != 0)
+#define IS_RIGHT_LEAF(x)       (((x) & RIGHT_IS_LEAF) != 0)
+#define P_BIT(key, bit)                (key[bit >> 3] >> (bit & 7)) & 1
+#define CHECKBITS              8
+
+struct T {
+       char *str;
+};
+
+static void *T_new(struct block_pool *bp, const char *key, size_t T_size) {
+       struct T *leaf = block_pool_alloc(bp, T_size);
+       memset(leaf, 0, T_size);
+       leaf->str = block_pool_strdup(bp, key);
+       return leaf;
+}
+
+void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, const size_t T_size) {
+       struct T *sp;
+       struct stringmap_node *w, *new, *last;
+       int len, cix, bit, fbit, svbit, ix, bitno;
+       const char *k, *m, *sm;
+       
+       if (!t->root) {
+               if (!enterf)
+                       return NULL;
+               
+               t->bp = block_pool_new(t->bp);
+               
+               t->root = T_new(t->bp, key, T_size);
+               t->count = 1;
+               
+               return t->root;
+       }
+
+       /* Count full string length */
+       for (k = key, len = 0; *k; k++, len++)
+               ;
+       
+       if (t->count == 1) {
+               w = t->root;
+               svbit = 0;
+       } else {
+               w = t->root;
+               bitno = len * CHECKBITS;
+               for (;;) {
+                       bit = BITNO(w->bitno);
+                       fbit = bit > bitno ? 0 : P_BIT(key, bit);
+                       svbit = fbit ? IS_RIGHT_LEAF(w->bitno) :
+                           IS_LEFT_LEAF(w->bitno);
+                       w = w->lr[fbit];
+                       if (svbit)
+                               break;
+               }
+       }
+
+       sp = (struct T *)w;
+
+       sm = m = sp->str;
+       k = key;
+
+       /* Check for correct string and return */
+       for (cix = 0; *m && *k && *m == *k; m++, k++, cix += CHECKBITS)
+               ;
+       if (*m == 0 && *k == 0) {
+               //if (!enterf && sp->value == NULL)
+               //      return NULL;
+               return sp;
+       }
+
+       if (!enterf)
+               return NULL; /* no string found and do not enter */
+
+       ix = *m ^ *k;
+       while ((ix & 1) == 0)
+               ix >>= 1, cix++;
+
+       /* Create new node */
+       new = block_pool_alloc(t->bp, sizeof *new);
+       bit = P_BIT(key, cix);
+       new->bitno = cix | (bit ? RIGHT_IS_LEAF : LEFT_IS_LEAF);
+       new->lr[bit] = T_new(t->bp, key, T_size);
+
+       if (t->count++ == 1) {
+               new->lr[!bit] = t->root;
+               new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF);
+               t->root = new;
+               return (struct T *)new->lr[bit];
+       }
+
+       w = t->root;
+       last = NULL;
+       for (;;) {
+               fbit = w->bitno;
+               bitno = BITNO(w->bitno);
+               assert(bitno != cix);
+               if (bitno > cix)
+                       break;
+               svbit = P_BIT(key, bitno);
+               last = w;
+               w = w->lr[svbit];
+               if (fbit & (svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF))
+                       break;
+       }
+
+       new->lr[!bit] = w;
+       if (last == NULL) {
+               t->root = new;
+       } else {
+               last->lr[svbit] = new;
+               last->bitno &= ~(svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF);
+       }
+       if (bitno < cix)
+               new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF);
+       return (struct T *)new->lr[bit];
+}
diff --git a/ccan/stringmap/stringmap.h b/ccan/stringmap/stringmap.h
new file mode 100644 (file)
index 0000000..df9fe63
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se).
+ * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CCAN_STRINGMAP_H
+#define CCAN_STRINGMAP_H
+
+#include <ccan/block_pool/block_pool.h>
+#include <stdint.h>
+
+#define stringmap(theType) struct {struct stringmap t; struct {char *str; theType value;} *last;}
+       //the 'last' pointer here is used as a hacky typeof() alternative
+
+#define stringmap_new(ctx) {{0,0,(struct block_pool*)(ctx)},0}
+#define stringmap_init(sm, ctx) do { \
+               (sm).t.root = 0; \
+               (sm).t.count = 0; \
+               (sm).t.bp = (struct block_pool*)(ctx); \
+               (sm).last = 0; \
+       } while(0)
+#define stringmap_free(sm) do { \
+               if ((sm).t.root) \
+                       block_pool_free((sm).t.bp); \
+       } while(0)
+
+#define stringmap_lookup(sm, key) stringmap_le(sm, key, 0)
+#define stringmap_enter(sm, key) stringmap_le(sm, key, 1)
+
+//this macro sets sm.last so it can exploit its type
+#define stringmap_le(sm, key, enterf) ((((sm).last) = stringmap_lookup_real(&(sm).t, key, enterf, sizeof(*(sm).last))) ? &(sm).last->value : NULL)
+
+
+struct stringmap_node {
+       uint32_t bitno;
+       struct stringmap_node *lr[2];
+};
+
+struct stringmap {
+       struct stringmap_node *root;
+       size_t count;
+       struct block_pool *bp;
+       //hack: 'bp' holds talloc ctx when 'root' is NULL
+};
+
+void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, size_t T_size);
+
+#endif
diff --git a/ccan/stringmap/test/run.c b/ccan/stringmap/test/run.c
new file mode 100644 (file)
index 0000000..37e98e6
--- /dev/null
@@ -0,0 +1,203 @@
+#include "stringmap/stringmap.h"
+#include "stringmap/stringmap.c"
+
+#include "tap/tap.h"
+
+static void test_trivial(void) {
+       stringmap(int) map = stringmap_new(NULL);
+       
+       ok1(stringmap_lookup(map, "one") == NULL);
+       *stringmap_enter(map, "one") = 1;
+       
+       ok1(stringmap_lookup(map, "two") == NULL);
+       *stringmap_enter(map, "two") = 2;
+       
+       ok1(stringmap_lookup(map, "three") == NULL);
+       *stringmap_enter(map, "three") = 3;
+       
+       ok1(stringmap_lookup(map, "four") == NULL);
+       *stringmap_enter(map, "four") = 4;
+       
+       ok1(*stringmap_lookup(map, "three") == 3);
+       ok1(*stringmap_lookup(map, "one") == 1);
+       ok1(*stringmap_lookup(map, "four") == 4);
+       ok1(*stringmap_lookup(map, "two") == 2);
+       
+       ok1(map.t.count == 4);
+       
+       stringmap_free(map);
+}
+
+
+static void scramble(void *base, size_t nmemb, size_t size) {
+   char *i = base;
+   char *o;
+   size_t sd;
+   for (;nmemb>1;nmemb--) {
+      o = i + size*(random()%nmemb);
+      for (sd=size;sd--;) {
+         char tmp = *o;
+         *o++ = *i;
+         *i++ = tmp;
+      }
+   }
+}
+
+//#define RANDOM_STRING_READABLE
+
+static char *random_string(struct block_pool *bp) {
+       size_t len = random() % 100;
+       char *str = block_pool_alloc(bp, len+1);
+       char *i;
+       
+       for (i=str; len--; i++) {
+               #ifndef RANDOM_STRING_READABLE
+               char c = random();
+               *i = c ? c : ' ';
+               #else
+               //only generate characters [32,126]
+               char c = random()%95 + 32;
+               *i = c;
+               #endif
+       }
+       *i = 0;
+       
+       return str;
+}
+
+struct test_entry {
+       const char *str;
+       char *value;
+};
+
+static int by_str(const void *ap, const void *bp) {
+       return strcmp(((struct test_entry*)ap)->str, ((struct test_entry*)bp)->str);
+}
+
+static void cull_duplicates(struct test_entry *entries, size_t *count) {
+       struct test_entry *i, *o, *e = entries + *count;
+       
+       qsort(entries, *count, sizeof(*entries), by_str);
+       
+       for (i=entries, o=entries; i<e;) {
+               //skip repeated strings
+               if (i>entries) {
+                       const char *last = i[-1].str;
+                       if (!strcmp(last, i->str)) {
+                               do i++; while(i<e && !strcmp(last, i->str));
+                               continue;
+                       }
+               }
+               
+               //write all entries with the same value (should also have same string)
+               {
+                       char *value = i->value;
+                       do *o++ = *i++; while(i<e && i->value == value);
+               }
+       }
+       
+       *count = o-entries;
+}
+
+static int test_stringmap(size_t count, FILE *out) {
+       stringmap(char*) map = stringmap_new(NULL);
+       
+       #define print(tag, fmt, ...) do { \
+                       if (out) \
+                               fprintf(out, tag fmt "\n", ##__VA_ARGS__); \
+               } while(0)
+       #define err(...) do { \
+                       print("error: ", __VA_ARGS__); \
+                       goto fail; \
+               } while(0)
+       #define debug(...) print("debug: ", __VA_ARGS__)
+       #define msg(...) print("info: ", __VA_ARGS__)
+       
+       struct block_pool *bp = block_pool_new(NULL);
+       struct test_entry *entries = block_pool_alloc(bp, sizeof(*entries) * count);
+       struct test_entry *i, *e = entries+count;
+       char *value_base = block_pool_alloc(bp, count), *value = value_base;
+       size_t unique_count = 0;
+       
+       //we use value to track whether an entry has been added or not
+       memset(value, 0, count);
+       
+       msg("Generating %zu test entries...", count);
+       
+       for (i=entries; i<e; value++) {
+               char *str = random_string(bp);
+               size_t same_count = (random()%5 ? random()%3 : random()%10) + 1;
+               
+               for (;same_count-- && i<e; i++) {
+                       i->str = str;
+                       i->value = value;
+               }
+       }
+       
+       cull_duplicates(entries, &count);
+       e = entries+count;
+       scramble(entries, count, sizeof(*entries));
+       
+       msg("Inserting/looking up %zu entries...", count);
+       
+       for (i=entries; i<e; i++) {
+               char **node;
+               
+               debug("Looking up %s", i->str);
+               
+               node = stringmap_lookup(map, i->str);
+               
+               if (!node) {
+                       if (*i->value)
+                               err("Previously inserted entry not found");
+                       
+                       debug("Not found; entering");
+                       
+                       node = stringmap_enter(map, i->str);
+                       if (!node || strcmp(i->str, map.last->str))
+                               err("Node not properly entered");
+                       *node = i->value;
+                       *i->value = 1; //mark that the entry is entered
+                       
+                       unique_count++;
+               } else {
+                       if (strcmp(i->value, map.last->value))
+                               err("lookup returned incorrect string");
+                       if (i->value != *node)
+                               err("lookup returned incorrect value");
+                       if (!**node)
+                               err("lookup returned bogus value");
+               }
+       }
+       
+       if (map.t.count != unique_count)
+               err("Map has incorrect count");
+       
+       printf("stringmap test passed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries);
+       
+       block_pool_free(bp);
+       stringmap_free(map);
+       return 1;
+
+fail:
+       printf("stringmap test failed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries);
+       
+       block_pool_free(bp);
+       stringmap_free(map);
+       return 0;
+       
+       #undef print
+       #undef err
+       #undef debug
+       #undef msg
+}
+
+int main(void)
+{
+       plan_tests(10);
+       
+       test_trivial();
+       ok1(test_stringmap(10000, NULL));
+       
+       return exit_status();
+}