From 581af9029888f99c663ea4554e23961a1933aca2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 4 May 2010 11:51:10 +0930 Subject: [PATCH] Hashtable routines. --- ccan/hashtable/_info | 116 +++++++++++++++++ ccan/hashtable/hashtable.c | 201 ++++++++++++++++++++++++++++++ ccan/hashtable/hashtable.h | 74 +++++++++++ ccan/hashtable/test/run.c | 248 +++++++++++++++++++++++++++++++++++++ 4 files changed, 639 insertions(+) create mode 100644 ccan/hashtable/_info create mode 100644 ccan/hashtable/hashtable.c create mode 100644 ccan/hashtable/hashtable.h create mode 100644 ccan/hashtable/test/run.c diff --git a/ccan/hashtable/_info b/ccan/hashtable/_info new file mode 100644 index 00000000..4bc4cc4d --- /dev/null +++ b/ccan/hashtable/_info @@ -0,0 +1,116 @@ +#include + +/** + * hashtable - hash table routines + * + * A hash table is an efficient structure for looking up keys. This version + * grows with usage and allows efficient deletion. + * + * Caveat: pointers placed into the hash must be aligned to 64 bits. + * ie. from malloc or because the structure they point to is aligned. + * An assert will be raised if this isn't true. + * + * Example: + * #include + * #include + * #include + * #include + * #include + * + * struct name_to_digit { + * const char *name; + * unsigned int val; + * }; + * + * static struct name_to_digit map[] = { + * { "zero", 0}, + * { "one", 1 }, + * { "two", 2 }, + * { "three", 3 }, + * { "four", 4 }, + * { "five", 5 }, + * { "six", 6 }, + * { "seven", 7 }, + * { "eight", 8 }, + * { "nine", 9 } + * }; + * + * // Wrapper for rehash function pointer. + * static unsigned long rehash(const void *e, void *unused) + * { + * return hash_string(((struct name_to_digit *)e)->name); + * } + * + * // Comparison function. + * static bool streq(const void *e, void *string) + * { + * return strcmp(((struct name_to_digit *)e)->name, string) == 0; + * } + * + * // We let them add their own aliases, eg. --alias=v=5 + * static void add_alias(struct hashtable *ht, const char *alias) + * { + * char *eq; + * struct name_to_digit *n; + * + * n = malloc(sizeof(*n)); + * n->name = strdup(alias); + * + * eq = strchr(n->name, '='); + * if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0"))) + * errx(1, "Usage: --alias=="); + * *eq = '\0'; + * hashtable_add(ht, hash_string(n->name), n); + * } + * + * int main(int argc, char *argv[]) + * { + * struct hashtable *ht; + * unsigned int i; + * unsigned long val; + * + * if (argc < 2) + * errx(1, "Usage: %s [--alias==]... ...", + * argv[0]); + * + * // Create and populate hash table. + * ht = hashtable_new(rehash, NULL); + * for (i = 0; i < sizeof(map)/sizeof(map[0]); i++) + * hashtable_add(ht, hash_string(map[i].name), &map[i]); + * + * // Add any aliases to the hash table. + * for (i = 1; i < argc; i++) { + * if (!strncmp(argv[i], "--alias=", strlen("--alias="))) + * add_alias(ht, argv[i] + strlen("--alias=")); + * else + * break; + * } + * + * // Find the other args in the hash table. + * for (val = 0; i < argc; i++) { + * struct name_to_digit *n; + * n = hashtable_find(ht, hash_string(argv[i]), + * streq, argv[i]); + * if (!n) + * errx(1, "Invalid digit name %s", argv[i]); + * // Append it to the value we are building up. + * val *= 10; + * val += n->val; + * } + * printf("%lu\n", val); + * return 0; + * } + * + * Licence: GPLv2 (or later) + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + return 0; + } + + return 1; +} diff --git a/ccan/hashtable/hashtable.c b/ccan/hashtable/hashtable.c new file mode 100644 index 00000000..46f85ac5 --- /dev/null +++ b/ccan/hashtable/hashtable.c @@ -0,0 +1,201 @@ +#include +#include +#include +#include + +/* This means a struct hashtable takes at least 512 bytes / 1k (32/64 bits). */ +#define HASHTABLE_BASE_BITS 7 + +/* We use 0x1 as deleted marker. */ +#define HASHTABLE_DELETED ((void *)0x1) + +/* Number of redundant bits in a pointer. */ +#define PTR_STEAL_BITS 3 + +static inline uintptr_t get_extra_ptr_bits(const void *p) +{ + return (uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1); +} + +static inline void *get_raw_ptr(const void *p) +{ + return (void *)((uintptr_t)p & ~(((uintptr_t)1 << PTR_STEAL_BITS)-1)); +} + +static inline void *make_ptr(const void *p, uintptr_t bits) +{ + return (void *)((uintptr_t)p | bits); +} + +struct hashtable { + unsigned long (*rehash)(const void *elem, void *priv); + void *priv; + unsigned int bits; + unsigned long elems, max; + void **table; +}; + +static inline bool ptr_is_valid(const void *e) +{ + return e > HASHTABLE_DELETED; +} + +struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem, + void *priv), + void *priv) +{ + struct hashtable *ht = malloc(sizeof(struct hashtable)); + if (ht) { + ht->bits = HASHTABLE_BASE_BITS; + ht->rehash = rehash; + ht->priv = priv; + ht->elems = 0; + ht->max = (1 << ht->bits) * 3 / 4; + ht->table = calloc(1 << ht->bits, sizeof(void *)); + if (!ht->table) { + free(ht); + ht = NULL; + } + } + return ht; +} + +void hashtable_free(struct hashtable *ht) +{ + free(ht->table); + free(ht); +} + +static unsigned long hash_bucket(const struct hashtable *ht, + unsigned long h, unsigned long *ptrbits) +{ + *ptrbits = (h >> ht->bits) & (((uintptr_t)1 << PTR_STEAL_BITS)-1); + return h & ((1 << ht->bits)-1); +} + +/* This does not expand the hash table, that's up to caller. */ +static void ht_add(struct hashtable *ht, const void *new, unsigned long h) +{ + unsigned long i, h2; + + i = hash_bucket(ht, h, &h2); + + while (ptr_is_valid(ht->table[i])) { + i = (i + 1) & ((1 << ht->bits)-1); + } + ht->table[i] = make_ptr(new, h2); +} + +static bool double_table(struct hashtable *ht) +{ + unsigned int i; + size_t oldnum = (size_t)1 << ht->bits; + void **oldtable, *e; + + oldtable = ht->table; + ht->table = calloc(1 << (ht->bits+1), sizeof(void *)); + if (!ht->table) { + ht->table = oldtable; + return false; + } + ht->bits++; + ht->max *= 2; + + for (i = 0; i < oldnum; i++) { + if (ptr_is_valid(e = oldtable[i])) { + e = get_raw_ptr(e); + ht_add(ht, e, ht->rehash(e, ht->priv)); + } + } + free(oldtable); + return true; +} + +bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p) +{ + if (ht->elems+1 > ht->max && !double_table(ht)) + return false; + ht->elems++; + assert(p); + assert(((uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1)) == 0); + ht_add(ht, p, hash); + return true; +} + +/* Find bucket for an element. */ +static size_t ht_find(struct hashtable *ht, + unsigned long hash, + bool (*cmp)(const void *htelem, void *cmpdata), + void *cmpdata) +{ + void *e; + unsigned long i, h2; + + i = hash_bucket(ht, hash, &h2); + + while ((e = ht->table[i]) != NULL) { + if (e != HASHTABLE_DELETED + && h2 == get_extra_ptr_bits(e) + && cmp(get_raw_ptr(ht->table[i]), cmpdata)) + return i; + i = (i + 1) & ((1 << ht->bits)-1); + } + return (size_t)-1; +} + + +/* If every one of the following buckets are DELETED (up to the next unused + one), we can actually mark them all unused. */ +static void delete_run(struct hashtable *ht, unsigned int num) +{ + unsigned int i, last = num + 1; + + while (ht->table[last & ((1 << ht->bits)-1)]) { + if (ptr_is_valid(ht->table[last & ((1 << ht->bits)-1)])) + return; + last++; + } + for (i = num; i < last; i++) + ht->table[i & ((1 << ht->bits)-1)] = NULL; +} + +void *hashtable_find(struct hashtable *ht, + unsigned long hash, + bool (*cmp)(const void *htelem, void *cmpdata), + void *cmpdata) +{ + size_t i = ht_find(ht, hash, cmp, cmpdata); + if (i != (size_t)-1) + return get_raw_ptr(ht->table[i]); + return NULL; +} + +static bool ptr_cmp(const void *htelem, void *cmpdata) +{ + return htelem == cmpdata; +} + +bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p) +{ + size_t i = ht_find(ht, hash, ptr_cmp, (void *)p); + if (i == (size_t)-1) + return false; + + ht->elems--; + ht->table[i] = HASHTABLE_DELETED; + delete_run(ht, i); + return true; +} + + +void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg), + void *cbarg) +{ + size_t i; + + for (i = 0; i < (1 << ht->bits); i++) { + if (ptr_is_valid(ht->table[i])) + if (cb(get_raw_ptr(ht->table[i]), cbarg)) + return; + } +} diff --git a/ccan/hashtable/hashtable.h b/ccan/hashtable/hashtable.h new file mode 100644 index 00000000..269e942f --- /dev/null +++ b/ccan/hashtable/hashtable.h @@ -0,0 +1,74 @@ +#ifndef CCAN_HASHTABLE_H +#define CCAN_HASHTABLE_H +#include "config.h" +#include + +struct hashtable; + +/** + * hashtable_new - allocate a hash tree. + * @rehash: hash function to use for rehashing. + * @priv: private argument to @rehash function. + */ +struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem, + void *priv), + void *priv); + +/** + * hashtable_free - dellocate a hash tree. + * + * This doesn't do anything to any pointers left in it. + */ +void hashtable_free(struct hashtable *); + +/** + * hashtable_find - look for an object in a hash tree. + * @ht: the hashtable + * @hash: the hash value of the object you are seeking + * @cmp: a comparison function: returns true if the object is found + * @cmpdata: the data to hand as second arg to @cmp + * + * Note that you can do all the work inside the cmp function if you want to: + * hashtable_find will return @htelem if @cmp returns true. + */ +void *hashtable_find(struct hashtable *ht, + unsigned long hash, + bool (*cmp)(const void *htelem, void *cmpdata), + void *cmpdata); + +/** + * hashtable_add - add an (aligned) pointer into a hash tree. + * @ht: the hashtable + * @hash: the hash value of the object + * @p: the non-NULL pointer, usually from malloc or equiv. + * + * Note that this implementation (ab)uses the lower bits of the pointer, so + * it will abort if the pointer is not aligned to 8 bytes. + * + * Also note that this can only fail due to allocation failure. Otherwise, it + * returns true. + */ +bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p); + +/** + * hashtable_del - remove a pointer from a hash tree + * @ht: the hashtable + * @hash: the hash value of the object + * @p: the pointer + * + * Returns true if the pointer was found (and deleted). + */ +bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p); + +/** + * hashtable_traverse - call a function on every pointer in hash tree + * @ht: the hashtable + * @cb: the callback: returns true to abort traversal. + * @cbarg: the argument to the callback + * + * Note that your traversal callback may delete any entry (it won't crash), + * but it may make the traverse unreliable. + */ +void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg), + void *cbarg); +#endif /* CCAN_HASHTABLE_H */ diff --git a/ccan/hashtable/test/run.c b/ccan/hashtable/test/run.c new file mode 100644 index 00000000..e072cae7 --- /dev/null +++ b/ccan/hashtable/test/run.c @@ -0,0 +1,248 @@ +#include +#include +#include +#include +#include + +#define NUM_VALS (1 << HASHTABLE_BASE_BITS) + +/* We use the number divided by two as the hash (for lots of + collisions), plus set all the higher bits so we can detect if they + don't get masked out. */ +static unsigned long hash(const void *elem, void *unused) +{ + unsigned long h = *(uint64_t *)elem / 2; + h |= -1UL << HASHTABLE_BASE_BITS; + return h; +} + +static bool objcmp(const void *htelem, void *cmpdata) +{ + return *(uint64_t *)htelem == *(uint64_t *)cmpdata; +} + +static void add_vals(struct hashtable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (hashtable_find(ht, hash(&i, NULL), objcmp, &i)) { + fail("%llu already in hash", (long long)i); + return; + } + hashtable_add(ht, hash(&val[i], NULL), &val[i]); + if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { + fail("%llu not added to hash", (long long)i); + return; + } + } + pass("Added %llu numbers to hash", (long long)i); +} + +static void refill_vals(struct hashtable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (hashtable_find(ht, hash(&i, NULL), objcmp, &i)) + continue; + hashtable_add(ht, hash(&val[i], NULL), &val[i]); + } +} + +static void find_vals(struct hashtable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { + fail("%llu not found in hash", (long long)i); + return; + } + } + pass("Found %llu numbers in hash", (long long)i); +} + +static void del_vals(struct hashtable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (!hashtable_del(ht, hash(&val[i], NULL), &val[i])) { + fail("%llu not deleted from hash", (long long)i); + return; + } + } + pass("Deleted %llu numbers in hash", (long long)i); +} + +struct travarg { + unsigned int count; + struct hashtable *ht; + char touched[NUM_VALS]; + uint64_t *val; +}; + +static bool count(void *p, void *cbarg) +{ + struct travarg *travarg = cbarg; + travarg->count++; + travarg->touched[*(uint64_t *)p]++; + return false; +} + +static bool delete_self(void *p, void *cbarg) +{ + struct travarg *travarg = cbarg; + uint64_t val = *(uint64_t *)p; + + travarg->count++; + travarg->touched[val]++; + return !hashtable_del(travarg->ht, hash(p, NULL), p); +} + +static bool delete_next(void *p, void *cbarg) +{ + struct travarg *travarg = cbarg; + uint64_t val = *(uint64_t *)p; + uint64_t *next = &travarg->val[(val + 1) % NUM_VALS]; + + travarg->count++; + travarg->touched[val]++; + return !hashtable_del(travarg->ht, hash(next, NULL), next); +} + +static bool delete_prev(void *p, void *cbarg) +{ + struct travarg *travarg = cbarg; + uint64_t val = *(uint64_t *)p; + uint64_t *prev = &travarg->val[(val - 1) % NUM_VALS]; + + travarg->count++; + travarg->touched[val]++; + return !hashtable_del(travarg->ht, hash(prev, NULL), prev); +} + +static bool stop_halfway(void *p, void *cbarg) +{ + struct travarg *travarg = cbarg; + travarg->count++; + travarg->touched[*(uint64_t *)p]++; + + return (travarg->count == NUM_VALS / 2); +} + +static void check_all_touched_once(struct travarg *travarg) +{ + unsigned int i; + + for (i = 0; i < NUM_VALS; i++) { + if (travarg->touched[i] != 1) { + fail("Value %u touched %u times", + i, travarg->touched[i]); + return; + } + } + pass("All values touched once"); +} + +static void check_only_touched_once(struct travarg *travarg) +{ + unsigned int i; + + for (i = 0; i < NUM_VALS; i++) { + if (travarg->touched[i] > 1) { + fail("Value %u touched multiple (%u) times", + i, travarg->touched[i]); + return; + } + } + pass("No value touched twice"); +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct hashtable *ht; + uint64_t val[NUM_VALS]; + struct travarg travarg; + uint64_t dne; + + plan_tests(20); + for (i = 0; i < NUM_VALS; i++) + val[i] = i; + dne = i; + + ht = hashtable_new(hash, NULL); + ok1(ht->max < (1 << ht->bits)); + ok1(ht->bits == HASHTABLE_BASE_BITS); + + /* We cannot find an entry which doesn't exist. */ + ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne)); + + /* Fill it, it should increase in size (once). */ + add_vals(ht, val, NUM_VALS); + ok1(ht->bits == HASHTABLE_BASE_BITS + 1); + ok1(ht->max < (1 << ht->bits)); + + /* Find all. */ + find_vals(ht, val, NUM_VALS); + ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne)); + + /* Delete all. */ + del_vals(ht, val, NUM_VALS); + ok1(!hashtable_find(ht, hash(&val[0], NULL), objcmp, &val[0])); + + /* Refill. */ + refill_vals(ht, val, NUM_VALS); + + /* Traverse tests. */ + travarg.ht = ht; + travarg.val = val; + memset(travarg.touched, 0, sizeof(travarg.touched)); + travarg.count = 0; + + /* Traverse. */ + hashtable_traverse(ht, count, &travarg); + ok1(travarg.count == NUM_VALS); + check_all_touched_once(&travarg); + + memset(travarg.touched, 0, sizeof(travarg.touched)); + travarg.count = 0; + hashtable_traverse(ht, stop_halfway, &travarg); + ok1(travarg.count == NUM_VALS / 2); + check_only_touched_once(&travarg); + + memset(travarg.touched, 0, sizeof(travarg.touched)); + travarg.count = 0; + i = 0; + /* Delete until we make no more progress. */ + for (;;) { + hashtable_traverse(ht, delete_self, &travarg); + if (travarg.count == i || travarg.count > NUM_VALS) + break; + i = travarg.count; + } + ok1(travarg.count == NUM_VALS); + check_all_touched_once(&travarg); + + memset(travarg.touched, 0, sizeof(travarg.touched)); + travarg.count = 0; + refill_vals(ht, val, NUM_VALS); + hashtable_traverse(ht, delete_next, &travarg); + ok1(travarg.count <= NUM_VALS); + check_only_touched_once(&travarg); + + memset(travarg.touched, 0, sizeof(travarg.touched)); + travarg.count = 0; + refill_vals(ht, val, NUM_VALS); + hashtable_traverse(ht, delete_prev, &travarg); + ok1(travarg.count <= NUM_VALS); + check_only_touched_once(&travarg); + + return exit_status(); +} -- 2.39.2