From 083a691fe382eeff1d72754c6d33021192cacc30 Mon Sep 17 00:00:00 2001 From: Joey Adams Date: Wed, 15 Jul 2009 01:19:52 -0400 Subject: [PATCH] Added module stringmap --- ccan/stringmap/_info | 66 ++++++++++++ ccan/stringmap/stringmap.c | 157 ++++++++++++++++++++++++++++ ccan/stringmap/stringmap.h | 71 +++++++++++++ ccan/stringmap/test/run.c | 203 +++++++++++++++++++++++++++++++++++++ 4 files changed, 497 insertions(+) create mode 100644 ccan/stringmap/_info create mode 100644 ccan/stringmap/stringmap.c create mode 100644 ccan/stringmap/stringmap.h create mode 100644 ccan/stringmap/test/run.c diff --git a/ccan/stringmap/_info b/ccan/stringmap/_info new file mode 100644 index 00000000..38f30678 --- /dev/null +++ b/ccan/stringmap/_info @@ -0,0 +1,66 @@ +#include +#include +#include "config.h" + +/** + * stringmap - Macros for mapping strings to things + * + * stringmap provides a generic string map via macros. + * + * Features which are sorely lacking in this version of stringmap are deletion and traversal. + * + * Example: + * + * #include + * + * static const char *get_string(void) { + * static char buffer[4096]; + * char *tail; + * if (!fgets(buffer, sizeof(buffer), stdin)) + * return NULL; + * tail = strchr(buffer, 0); + * if (tail>buffer && tail[-1]=='\n') + * *--tail = 0; + * if (!*buffer) + * return NULL; + * return buffer; + * } + * + * int main(void) { + * stringmap(int) map = stringmap_new(NULL); + * const char *string; + * + * while ((string = get_string()) != NULL) { + * int *count = stringmap_lookup(map, string); + * + * if (!count) { + * printf("\"%s\" is new\n", string); + * count = stringmap_enter(map, string); + * } + * + * (*count) ++; + * + * printf("\"%s\" has been entered %d times\n", string, *count); + * } + * + * stringmap_free(map); + * + * return 0; + * } + * + * Authors: Joey Adams, Anders Magnusson + * License: BSD + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/block_pool\n"); + return 0; + } + + return 1; +} diff --git a/ccan/stringmap/stringmap.c b/ccan/stringmap/stringmap.c new file mode 100644 index 00000000..059dbaaa --- /dev/null +++ b/ccan/stringmap/stringmap.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se). + * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This is a heavily modified version of the Patricia tree implementation + in PCC at http://pcc.zentus.com/cgi-bin/cvsweb.cgi/cc/cpp/cpp.c?rev=1.96 */ + +#include "stringmap.h" + +#if 0 +#include +#else +#define assert(...) do {} while(0) +#endif + +#define BITNO(x) ((x) & ~(LEFT_IS_LEAF|RIGHT_IS_LEAF)) +#define LEFT_IS_LEAF 0x80000000 +#define RIGHT_IS_LEAF 0x40000000 +#define IS_LEFT_LEAF(x) (((x) & LEFT_IS_LEAF) != 0) +#define IS_RIGHT_LEAF(x) (((x) & RIGHT_IS_LEAF) != 0) +#define P_BIT(key, bit) (key[bit >> 3] >> (bit & 7)) & 1 +#define CHECKBITS 8 + +struct T { + char *str; +}; + +static void *T_new(struct block_pool *bp, const char *key, size_t T_size) { + struct T *leaf = block_pool_alloc(bp, T_size); + memset(leaf, 0, T_size); + leaf->str = block_pool_strdup(bp, key); + return leaf; +} + +void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, const size_t T_size) { + struct T *sp; + struct stringmap_node *w, *new, *last; + int len, cix, bit, fbit, svbit, ix, bitno; + const char *k, *m, *sm; + + if (!t->root) { + if (!enterf) + return NULL; + + t->bp = block_pool_new(t->bp); + + t->root = T_new(t->bp, key, T_size); + t->count = 1; + + return t->root; + } + + /* Count full string length */ + for (k = key, len = 0; *k; k++, len++) + ; + + if (t->count == 1) { + w = t->root; + svbit = 0; + } else { + w = t->root; + bitno = len * CHECKBITS; + for (;;) { + bit = BITNO(w->bitno); + fbit = bit > bitno ? 0 : P_BIT(key, bit); + svbit = fbit ? IS_RIGHT_LEAF(w->bitno) : + IS_LEFT_LEAF(w->bitno); + w = w->lr[fbit]; + if (svbit) + break; + } + } + + sp = (struct T *)w; + + sm = m = sp->str; + k = key; + + /* Check for correct string and return */ + for (cix = 0; *m && *k && *m == *k; m++, k++, cix += CHECKBITS) + ; + if (*m == 0 && *k == 0) { + //if (!enterf && sp->value == NULL) + // return NULL; + return sp; + } + + if (!enterf) + return NULL; /* no string found and do not enter */ + + ix = *m ^ *k; + while ((ix & 1) == 0) + ix >>= 1, cix++; + + /* Create new node */ + new = block_pool_alloc(t->bp, sizeof *new); + bit = P_BIT(key, cix); + new->bitno = cix | (bit ? RIGHT_IS_LEAF : LEFT_IS_LEAF); + new->lr[bit] = T_new(t->bp, key, T_size); + + if (t->count++ == 1) { + new->lr[!bit] = t->root; + new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF); + t->root = new; + return (struct T *)new->lr[bit]; + } + + w = t->root; + last = NULL; + for (;;) { + fbit = w->bitno; + bitno = BITNO(w->bitno); + assert(bitno != cix); + if (bitno > cix) + break; + svbit = P_BIT(key, bitno); + last = w; + w = w->lr[svbit]; + if (fbit & (svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF)) + break; + } + + new->lr[!bit] = w; + if (last == NULL) { + t->root = new; + } else { + last->lr[svbit] = new; + last->bitno &= ~(svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF); + } + if (bitno < cix) + new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF); + return (struct T *)new->lr[bit]; +} diff --git a/ccan/stringmap/stringmap.h b/ccan/stringmap/stringmap.h new file mode 100644 index 00000000..df9fe636 --- /dev/null +++ b/ccan/stringmap/stringmap.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se). + * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CCAN_STRINGMAP_H +#define CCAN_STRINGMAP_H + +#include +#include + +#define stringmap(theType) struct {struct stringmap t; struct {char *str; theType value;} *last;} + //the 'last' pointer here is used as a hacky typeof() alternative + +#define stringmap_new(ctx) {{0,0,(struct block_pool*)(ctx)},0} +#define stringmap_init(sm, ctx) do { \ + (sm).t.root = 0; \ + (sm).t.count = 0; \ + (sm).t.bp = (struct block_pool*)(ctx); \ + (sm).last = 0; \ + } while(0) +#define stringmap_free(sm) do { \ + if ((sm).t.root) \ + block_pool_free((sm).t.bp); \ + } while(0) + +#define stringmap_lookup(sm, key) stringmap_le(sm, key, 0) +#define stringmap_enter(sm, key) stringmap_le(sm, key, 1) + +//this macro sets sm.last so it can exploit its type +#define stringmap_le(sm, key, enterf) ((((sm).last) = stringmap_lookup_real(&(sm).t, key, enterf, sizeof(*(sm).last))) ? &(sm).last->value : NULL) + + +struct stringmap_node { + uint32_t bitno; + struct stringmap_node *lr[2]; +}; + +struct stringmap { + struct stringmap_node *root; + size_t count; + struct block_pool *bp; + //hack: 'bp' holds talloc ctx when 'root' is NULL +}; + +void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, size_t T_size); + +#endif diff --git a/ccan/stringmap/test/run.c b/ccan/stringmap/test/run.c new file mode 100644 index 00000000..37e98e60 --- /dev/null +++ b/ccan/stringmap/test/run.c @@ -0,0 +1,203 @@ +#include "stringmap/stringmap.h" +#include "stringmap/stringmap.c" + +#include "tap/tap.h" + +static void test_trivial(void) { + stringmap(int) map = stringmap_new(NULL); + + ok1(stringmap_lookup(map, "one") == NULL); + *stringmap_enter(map, "one") = 1; + + ok1(stringmap_lookup(map, "two") == NULL); + *stringmap_enter(map, "two") = 2; + + ok1(stringmap_lookup(map, "three") == NULL); + *stringmap_enter(map, "three") = 3; + + ok1(stringmap_lookup(map, "four") == NULL); + *stringmap_enter(map, "four") = 4; + + ok1(*stringmap_lookup(map, "three") == 3); + ok1(*stringmap_lookup(map, "one") == 1); + ok1(*stringmap_lookup(map, "four") == 4); + ok1(*stringmap_lookup(map, "two") == 2); + + ok1(map.t.count == 4); + + stringmap_free(map); +} + + +static void scramble(void *base, size_t nmemb, size_t size) { + char *i = base; + char *o; + size_t sd; + for (;nmemb>1;nmemb--) { + o = i + size*(random()%nmemb); + for (sd=size;sd--;) { + char tmp = *o; + *o++ = *i; + *i++ = tmp; + } + } +} + +//#define RANDOM_STRING_READABLE + +static char *random_string(struct block_pool *bp) { + size_t len = random() % 100; + char *str = block_pool_alloc(bp, len+1); + char *i; + + for (i=str; len--; i++) { + #ifndef RANDOM_STRING_READABLE + char c = random(); + *i = c ? c : ' '; + #else + //only generate characters [32,126] + char c = random()%95 + 32; + *i = c; + #endif + } + *i = 0; + + return str; +} + +struct test_entry { + const char *str; + char *value; +}; + +static int by_str(const void *ap, const void *bp) { + return strcmp(((struct test_entry*)ap)->str, ((struct test_entry*)bp)->str); +} + +static void cull_duplicates(struct test_entry *entries, size_t *count) { + struct test_entry *i, *o, *e = entries + *count; + + qsort(entries, *count, sizeof(*entries), by_str); + + for (i=entries, o=entries; ientries) { + const char *last = i[-1].str; + if (!strcmp(last, i->str)) { + do i++; while(istr)); + continue; + } + } + + //write all entries with the same value (should also have same string) + { + char *value = i->value; + do *o++ = *i++; while(ivalue == value); + } + } + + *count = o-entries; +} + +static int test_stringmap(size_t count, FILE *out) { + stringmap(char*) map = stringmap_new(NULL); + + #define print(tag, fmt, ...) do { \ + if (out) \ + fprintf(out, tag fmt "\n", ##__VA_ARGS__); \ + } while(0) + #define err(...) do { \ + print("error: ", __VA_ARGS__); \ + goto fail; \ + } while(0) + #define debug(...) print("debug: ", __VA_ARGS__) + #define msg(...) print("info: ", __VA_ARGS__) + + struct block_pool *bp = block_pool_new(NULL); + struct test_entry *entries = block_pool_alloc(bp, sizeof(*entries) * count); + struct test_entry *i, *e = entries+count; + char *value_base = block_pool_alloc(bp, count), *value = value_base; + size_t unique_count = 0; + + //we use value to track whether an entry has been added or not + memset(value, 0, count); + + msg("Generating %zu test entries...", count); + + for (i=entries; istr = str; + i->value = value; + } + } + + cull_duplicates(entries, &count); + e = entries+count; + scramble(entries, count, sizeof(*entries)); + + msg("Inserting/looking up %zu entries...", count); + + for (i=entries; istr); + + node = stringmap_lookup(map, i->str); + + if (!node) { + if (*i->value) + err("Previously inserted entry not found"); + + debug("Not found; entering"); + + node = stringmap_enter(map, i->str); + if (!node || strcmp(i->str, map.last->str)) + err("Node not properly entered"); + *node = i->value; + *i->value = 1; //mark that the entry is entered + + unique_count++; + } else { + if (strcmp(i->value, map.last->value)) + err("lookup returned incorrect string"); + if (i->value != *node) + err("lookup returned incorrect value"); + if (!**node) + err("lookup returned bogus value"); + } + } + + if (map.t.count != unique_count) + err("Map has incorrect count"); + + printf("stringmap test passed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries); + + block_pool_free(bp); + stringmap_free(map); + return 1; + +fail: + printf("stringmap test failed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries); + + block_pool_free(bp); + stringmap_free(map); + return 0; + + #undef print + #undef err + #undef debug + #undef msg +} + +int main(void) +{ + plan_tests(10); + + test_trivial(); + ok1(test_stringmap(10000, NULL)); + + return exit_status(); +} -- 2.39.2