From: Rusty Russell Date: Wed, 28 Sep 2011 02:37:35 +0000 (+0930) Subject: strmap: new module for ordered map of strings using a critbit tree. X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=20f3b260313fb4d5566aeb0d0c5439574e914e2d strmap: new module for ordered map of strings using a critbit tree. --- diff --git a/ccan/strmap/_info b/ccan/strmap/_info new file mode 100644 index 00000000..9832f80d --- /dev/null +++ b/ccan/strmap/_info @@ -0,0 +1,62 @@ +#include +#include "config.h" + +/** + * strmap - an ordered map of strings to values + * + * This code implements an ordered map of strings as a critbit tree. See: + * + * http://cr.yp.to/critbit.html + * http://github.com/agl/critbit (which this code is based on) + * + * License: Public domain (but some dependencies are LGPL!) + * Author: Rusty Russell + * Ccanlint: + * license_depends_compat FAIL + * + * Example: + * #include + * #include + * + * static bool dump(const char *member, size_t value, void *unused) + * { + * printf("%s at %zu. ", member, value); + * // false means keep going with iteration. + * return false; + * } + * + * int main(int argc, char *argv[]) + * { + * size_t i; + * struct { STRMAP_MEMBERS(size_t); } map; + * + * strmap_init(&map); + * for (i = 1; i < argc; i++) + * // This only adds the first time for this arg. + * strmap_add(&map, argv[i], i); + * + * strmap_iterate(&map, dump, NULL); + * printf("\n"); + * return 0; + * } + * // Given 'foo' outputs 'foo at 1. ' + * // Given 'foo bar' outputs 'bar at 2. foo at 1. ' + * // Given 'foo foo bar zebra' outputs 'bar at 3. foo at 1. zebra at 4. ' + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/ilog\n" + "ccan/short_types\n" + "ccan/str\n" + "ccan/tcon\n" + "ccan/typesafe_cb\n"); + return 0; + } + + return 1; +} diff --git a/ccan/strmap/strmap.c b/ccan/strmap/strmap.c new file mode 100644 index 00000000..7d03cca4 --- /dev/null +++ b/ccan/strmap/strmap.c @@ -0,0 +1,238 @@ +/* This code is based on ccan/strset.c. */ +#include +#include +#include +#include +#include +#include + +struct node { + /* These point to strings or nodes. */ + struct strmap child[2]; + /* The byte number where first bit differs. */ + size_t byte_num; + /* The bit where these children differ. */ + u8 bit_num; +}; + +/* Closest member to this in a non-empty map. */ +static struct strmap *closest(struct strmap *n, const char *member) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + + /* Anything with NULL value is a node. */ + while (!n->v) { + u8 direction = 0; + + if (n->u.n->byte_num < len) { + u8 c = bytes[n->u.n->byte_num]; + direction = (c >> n->u.n->bit_num) & 1; + } + n = &n->u.n->child[direction]; + } + return n; +} + +void *strmap_get_(const struct strmap *map, const char *member) +{ + struct strmap *n; + + /* Empty map? */ + if (!map->u.n) + return NULL; + n = closest((struct strmap *)map, member); + if (streq(member, n->u.s)) + return n->v; + return NULL; +} + +bool strmap_add_(struct strmap *map, const char *member, const void *value) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + struct strmap *n; + struct node *newn; + size_t byte_num; + u8 bit_num, new_dir; + + assert(value); + + /* Empty map? */ + if (!map->u.n) { + map->u.s = member; + map->v = (void *)value; + return true; + } + + /* Find closest existing member. */ + n = closest(map, member); + + /* Find where they differ. */ + for (byte_num = 0; n->u.s[byte_num] == member[byte_num]; byte_num++) { + if (member[byte_num] == '\0') { + /* All identical! */ + return false; + } + } + + /* Find which bit differs (if we had ilog8, we'd use it) */ + bit_num = ilog32_nz((u8)n->u.s[byte_num] ^ bytes[byte_num]) - 1; + assert(bit_num < CHAR_BIT); + + /* Which direction do we go at this bit? */ + new_dir = ((bytes[byte_num]) >> bit_num) & 1; + + /* Allocate new node. */ + newn = malloc(sizeof(*newn)); + if (!newn) { + /* FIXME */ + return false; + } + newn->byte_num = byte_num; + newn->bit_num = bit_num; + newn->child[new_dir].v = (void *)value; + newn->child[new_dir].u.s = member; + + /* Find where to insert: not closest, but first which differs! */ + n = map; + while (!n->v) { + u8 direction = 0; + + if (n->u.n->byte_num > byte_num) + break; + /* Subtle: bit numbers are "backwards" for comparison */ + if (n->u.n->byte_num == byte_num && n->u.n->bit_num < bit_num) + break; + + if (n->u.n->byte_num < len) { + u8 c = bytes[n->u.n->byte_num]; + direction = (c >> n->u.n->bit_num) & 1; + } + n = &n->u.n->child[direction]; + } + + newn->child[!new_dir] = *n; + n->u.n = newn; + n->v = NULL; + return true; +} + +char *strmap_del_(struct strmap *map, const char *member, void **valuep) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + struct strmap *parent = NULL, *n; + const char *ret = NULL; + u8 direction = 0; /* prevent bogus gcc warning. */ + + /* Empty map? */ + if (!map->u.n) + return NULL; + + /* Find closest, but keep track of parent. */ + n = map; + /* Anything with NULL value is a node. */ + while (!n->v) { + u8 c = 0; + + parent = n; + if (n->u.n->byte_num < len) { + c = bytes[n->u.n->byte_num]; + direction = (c >> n->u.n->bit_num) & 1; + } else + direction = 0; + n = &n->u.n->child[direction]; + } + + /* Did we find it? */ + if (!streq(member, n->u.s)) + return NULL; + + ret = n->u.s; + if (valuep) + *valuep = n->v; + + if (!parent) { + /* We deleted last node. */ + map->u.n = NULL; + } else { + struct node *old = parent->u.n; + /* Raise other node to parent. */ + *parent = old->child[!direction]; + free(old); + } + + return (char *)ret; +} + +static bool iterate(struct strmap n, + bool (*handle)(const char *, void *, void *), void *data) +{ + if (n.v) + return handle(n.u.s, n.v, data); + + return iterate(n.u.n->child[0], handle, data) + || iterate(n.u.n->child[1], handle, data); +} + +void strmap_iterate_(const struct strmap *map, + bool (*handle)(const char *, void *, void *), void *data) +{ + /* Empty map? */ + if (!map->u.n) + return; + + iterate(*map, handle, data); +} + +const struct strmap *strmap_prefix_(const struct strmap *map, + const char *prefix) +{ + const struct strmap *n, *top; + size_t len = strlen(prefix); + const u8 *bytes = (const u8 *)prefix; + + /* Empty map -> return empty map. */ + if (!map->u.n) + return map; + + top = n = map; + + /* We walk to find the top, but keep going to check prefix matches. */ + while (!n->v) { + u8 c = 0, direction; + + if (n->u.n->byte_num < len) + c = bytes[n->u.n->byte_num]; + + direction = (c >> n->u.n->bit_num) & 1; + n = &n->u.n->child[direction]; + if (c) + top = n; + } + + if (!strstarts(n->u.s, prefix)) { + /* Convenient return for prefixes which do not appear in map. */ + static const struct strmap empty_map; + return &empty_map; + } + + return top; +} + +static void clear(struct strmap n) +{ + if (!n.v) { + clear(n.u.n->child[0]); + clear(n.u.n->child[1]); + free(n.u.n); + } +} + +void strmap_clear_(struct strmap *map) +{ + if (map->u.n) + clear(*map); + map->u.n = NULL; +} diff --git a/ccan/strmap/strmap.h b/ccan/strmap/strmap.h new file mode 100644 index 00000000..77197507 --- /dev/null +++ b/ccan/strmap/strmap.h @@ -0,0 +1,224 @@ +#ifndef CCAN_STRMAP_H +#define CCAN_STRMAP_H +#include "config.h" +#include +#include +#include +#include + +/** + * struct strmap - representation of a string map + * + * It's exposed here to allow you to embed it and so we can inline the + * trivial functions. + */ +struct strmap { + union { + struct node *n; + const char *s; + } u; + void *v; +}; + +/** + * STRMAP_MEMBERS - declare members for a type-specific strmap. + * @type: type for this map's values, or void * for any pointer. + * + * You use this to create your own typed strmap for a particular type. + * You can use an integer type, *but* remember you can't use "0" as a + * value! + * + * Example: + * struct strmap_intp { + * STRMAP_MEMBERS(int *); + * }; + */ +#define STRMAP_MEMBERS(type) \ + struct strmap raw; \ + TCON(type canary) + + +/** + * strmap_init - initialize a string map (empty) + * @map: the typed strmap to initialize. + * + * For completeness; if you've arranged for it to be NULL already you don't + * need this. + * + * Example: + * struct strmap_intp map; + * + * strmap_init(&map); + */ +#define strmap_init(map) strmap_init_(&(map)->raw) + +static inline void strmap_init_(struct strmap *map) +{ + map->u.n = NULL; +} + +/** + * strmap_empty - is this string map empty? + * @map: the typed strmap to check. + * + * Example: + * if (!strmap_empty(&map)) + * abort(); + */ +#define strmap_empty(map) strmap_empty_(&(map)->raw) + +static inline bool strmap_empty_(const struct strmap *map) +{ + return map->u.n == NULL; +} + +/** + * strmap_get - get a value from a string map + * @map: the typed strmap to search. + * @member: the string to search for. + * + * Returns the value, or NULL if it isn't in the map. + * + * Example: + * int *val = strmap_get(&map, "hello"); + * if (val) + * printf("hello => %i\n", *val); + */ +#define strmap_get(map, member) \ + tcon_cast((map), canary, strmap_get_(&(map)->raw, (member))) +void *strmap_get_(const struct strmap *map, const char *member); + +/** + * strmap_add - place a member in the string map. + * @map: the typed strmap to add to. + * @member: the string to place in the map. + * @v: the (non-NULL) value. + * + * This returns false if we run out of memory, or (more normally) if that + * string already appears in the map. + * + * Note that the pointer is placed in the map, the string is not copied. If + * you want a copy in the map, use strdup(). Similarly for the value. + * + * Example: + * val = malloc(sizeof *val); + * *val = 17; + * if (!strmap_add(&map, "goodbye", val)) + * printf("goodbye was already in the map\n"); + */ +#define strmap_add(map, member, value) \ + strmap_add_(&tcon_check((map), canary, (value))->raw, \ + (member), (void *)(value)) + +bool strmap_add_(struct strmap *map, const char *member, const void *value); + +/** + * strmap_del - remove a member from the string map. + * @map: the typed strmap to delete from. + * @member: the string to remove from the map. + * @valuep: the value (if non-NULL) + * + * This returns the string which was passed to strmap_map(), or NULL. + * This means that if you allocated a string (eg. using strdup()), you + * can free it here. Similarly, the value is returned in @valuep if + * @valuep is not NULL. + * + * Example: + * if (!strmap_del(&map, "goodbye", NULL)) + * printf("goodbye was not in the map?\n"); + */ +#define strmap_del(map, member, valuep) \ + strmap_del_(&tcon_check_ptr((map), canary, valuep)->raw, \ + (member), (void **)valuep) +char *strmap_del_(struct strmap *map, const char *member, void **valuep); + +/** + * strmap_clear - remove every member from the map. + * @map: the typed strmap to clear. + * + * The map will be empty after this. + * + * Example: + * strmap_clear(&map); + */ +#define strmap_clear(map) strmap_clear_(&(map)->raw) + +void strmap_clear_(struct strmap *map); + +/** + * strmap_iterate - ordered iteration over a map + * @map: the typed strmap to iterate through. + * @handle: the function to call. + * @arg: the argument for the function (types should match). + * + * @handle's prototype should be: + * bool @handle(const char *member, type value, typeof(arg) arg) + * + * If @handle returns true, the iteration will stop. + * You should not alter the map within the @handle function! + * + * Example: + * struct strmap_intp { + * STRMAP_MEMBERS(int *); + * }; + * static bool dump_some(const char *member, int *value, int *num) + * { + * // Only dump out num nodes. + * if (*(num--) == 0) + * return true; + * printf("%s=>%i\n", member, *value); + * return false; + * } + * + * static void dump_map(const struct strmap_intp *map) + * { + * int max = 100; + * strmap_iterate(map, dump_some, &max); + * if (max < 0) + * printf("... (truncated to 100 entries)\n"); + * } + */ +#define strmap_iterate(map, handle, arg) \ + strmap_iterate_(&(map)->raw, \ + typesafe_cb_cast(bool (*)(const char *, \ + void *, void *), \ + bool (*)(const char *, \ + tcon_type((map), canary), \ + __typeof__(arg)), (handle)), \ + (arg)) +void strmap_iterate_(const struct strmap *map, + bool (*handle)(const char *, void *, void *), void *data); + + +/** + * strmap_prefix - return a submap matching a prefix + * @map: the map. + * @prefix: the prefix. + * + * This returns a pointer into @map, so don't alter @map while using + * the return value. You can use strmap_iterate(), strmap_get() or + * strmap_empty() on the returned pointer. + * + * Example: + * static void dump_prefix(const struct strmap_intp *map, + * const char *prefix) + * { + * int max = 100; + * printf("Nodes with prefix %s:\n", prefix); + * strmap_iterate(strmap_prefix(map, prefix), dump_some, &max); + * if (max < 0) + * printf("... (truncated to 100 entries)\n"); + * } + */ +#if HAVE_TYPEOF +#define strmap_prefix(map, prefix) \ + ((const __typeof__(map))strmap_prefix_(&(map)->raw, (prefix))) +#else +#define strmap_prefix(map, prefix) \ + ((const void *)strmap_prefix_(&(map)->raw, (prefix))) +#endif + +const struct strmap *strmap_prefix_(const struct strmap *map, + const char *prefix); + +#endif /* CCAN_STRMAP_H */ diff --git a/ccan/strmap/test/run-order.c b/ccan/strmap/test/run-order.c new file mode 100644 index 00000000..417a7406 --- /dev/null +++ b/ccan/strmap/test/run-order.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include + +#define NUM 1000 + +static bool in_order(const char *member, char *value, unsigned int *count) +{ + int i = atoi(member); + ok1(i == atoi(value)); + ok1(*count == i); + (*count)++; + return false; +} + +static bool in_order_by_2(const char *member, char *value, unsigned int *count) +{ + int i = atoi(member); + ok1(i == atoi(value)); + ok1(*count == i); + (*count) += 2; + return false; +} + +static bool dump(const char *member, char *value, bool *ok) +{ + diag("%s=>%s", member, value); + if (value != member + 1) + *ok = false; + return false; +} + +int main(void) +{ + struct strmap_charp { + STRMAP_MEMBERS(char *); + } map; + unsigned int i; + char *str[NUM]; + bool dump_ok; + + plan_tests(1 + NUM * 4 + 3 * NUM); + strmap_init(&map); + + for (i = 0; i < NUM; i++) { + char template[10]; + sprintf(template, "%08u", i); + str[i] = strdup(template); + } + + for (i = 0; i < NUM; i++) + strmap_add(&map, str[i], str[i]+1); + + dump_ok = true; + strmap_iterate(&map, dump, &dump_ok); + ok1(dump_ok); + + /* Iterate. */ + i = 0; + strmap_iterate(&map, in_order, &i); + + /* Preserve order after deletion. */ + for (i = 0; i < NUM; i += 2) { + char *v; + ok1(strmap_del(&map, str[i], &v) == str[i]); + ok1(v == str[i] + 1); + } + + i = 1; + strmap_iterate(&map, in_order_by_2, &i); + + for (i = 1; i < NUM; i += 2) { + char *v; + ok1(strmap_del(&map, str[i], &v) == str[i]); + ok1(v == str[i] + 1); + } + + /* empty traverse. */ + strmap_iterate(&map, in_order_by_2, (unsigned int *)NULL); + + /* insert backwards, should be fine. */ + for (i = 0; i < NUM; i++) + strmap_add(&map, str[NUM-1-i], str[NUM-1-i]+1); + + i = 0; + strmap_iterate(&map, in_order, &i); + + strmap_clear(&map); + + for (i = 0; i < NUM; i++) + free(str[i]); + + /* This exits depending on whether all tests passed */ + return exit_status(); +} diff --git a/ccan/strmap/test/run-prefix.c b/ccan/strmap/test/run-prefix.c new file mode 100644 index 00000000..f6eb1779 --- /dev/null +++ b/ccan/strmap/test/run-prefix.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +/* Must be > 100, see below. */ +#define NUM 200 + +static bool in_order(const char *index, char *value, unsigned int *count) +{ + int i = atoi(index); + ok1(i == atoi(value)); + ok1(*count == i); + (*count)++; + return false; +} + +static bool find_empty(const char *index, char *value, char *empty) +{ + if (index == empty) + pass("Found empty entry!"); + return false; +} + +int main(void) +{ + struct map { + STRMAP_MEMBERS(char *); + }; + struct map map; + const struct map *sub; + unsigned int i; + char *str[NUM], *empty; + + plan_tests(8 + 2 * (1 + 10 + 100) + 1); + strmap_init(&map); + + for (i = 0; i < NUM; i++) { + char template[10]; + sprintf(template, "%08u", i); + str[i] = strdup(template); + } + + /* All prefixes of an empty map are empty. */ + sub = strmap_prefix(&map, "a"); + ok1(strmap_empty(sub)); + sub = strmap_prefix(&map, ""); + ok1(strmap_empty(sub)); + + for (i = 0; i < NUM; i++) + strmap_add(&map, str[i], str[i]+1); + + /* Nothing */ + sub = strmap_prefix(&map, "a"); + ok1(strmap_empty(sub)); + + /* Everything */ + sub = strmap_prefix(&map, "0"); + ok1(sub->raw.u.n == map.raw.u.n); + sub = strmap_prefix(&map, ""); + ok1(sub->raw.u.n == map.raw.u.n); + + /* Single. */ + sub = strmap_prefix(&map, "00000000"); + i = 0; + strmap_iterate(sub, in_order, &i); + ok1(i == 1); + + /* First 10. */ + sub = strmap_prefix(&map, "0000000"); + i = 0; + strmap_iterate(sub, in_order, &i); + ok1(i == 10); + + /* First 100. */ + sub = strmap_prefix(&map, "000000"); + i = 0; + strmap_iterate(sub, in_order, &i); + ok1(i == 100); + + /* Everything, *plus* empty string. */ + empty = strdup(""); + strmap_add(&map, empty, empty); + + sub = strmap_prefix(&map, ""); + /* Check we get *our* empty string back! */ + strmap_iterate(sub, find_empty, empty); + + strmap_clear(&map); + + for (i = 0; i < NUM; i++) + free(str[i]); + free(empty); + + /* This exits depending on whether all tests passed */ + return exit_status(); +} diff --git a/ccan/strmap/test/run.c b/ccan/strmap/test/run.c new file mode 100644 index 00000000..349ef4f7 --- /dev/null +++ b/ccan/strmap/test/run.c @@ -0,0 +1,69 @@ +#include +#include +#include + +int main(void) +{ + struct strmap_charp { + STRMAP_MEMBERS(char *); + } map; + const char str[] = "hello"; + const char val[] = "there"; + const char none[] = ""; + char *dup = strdup(str); + char *v; + + /* This is how many tests you plan to run */ + plan_tests(31); + + strmap_init(&map); + + ok1(!strmap_get(&map, str)); + ok1(!strmap_get(&map, none)); + ok1(!strmap_del(&map, str, NULL)); + ok1(!strmap_del(&map, none, NULL)); + + ok1(strmap_add(&map, str, val)); + ok1(strmap_get(&map, str) == val); + /* We compare the string, not the pointer. */ + ok1(strmap_get(&map, dup) == val); + ok1(!strmap_get(&map, none)); + + /* Add a duplicate should fail. */ + ok1(!strmap_add(&map, dup, val)); + ok1(strmap_get(&map, dup) == val); + + /* Delete should return original string. */ + ok1(strmap_del(&map, dup, &v) == str); + ok1(v == val); + ok1(!strmap_get(&map, str)); + ok1(!strmap_get(&map, none)); + + /* Try insert and delete of empty string. */ + ok1(strmap_add(&map, none, none)); + ok1(strmap_get(&map, none) == none); + ok1(!strmap_get(&map, str)); + + /* Delete should return original string. */ + ok1(strmap_del(&map, "", &v) == none); + ok1(v == none); + ok1(!strmap_get(&map, str)); + ok1(!strmap_get(&map, none)); + + /* Both at once... */ + ok1(strmap_add(&map, none, none)); + ok1(strmap_add(&map, str, val)); + ok1(strmap_get(&map, str) == val); + ok1(strmap_get(&map, none) == none); + ok1(strmap_del(&map, "does not exist", NULL) == NULL); + ok1(strmap_del(&map, "", NULL) == none); + ok1(strmap_get(&map, str) == val); + ok1(strmap_del(&map, dup, &v) == str); + ok1(v == val); + + ok1(strmap_empty(&map)); + free(dup); + + /* This exits depending on whether all tests passed */ + return exit_status(); +}