Hashtable routines.
authorRusty Russell <rusty@rustcorp.com.au>
Tue, 4 May 2010 02:21:10 +0000 (11:51 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 4 May 2010 02:21:10 +0000 (11:51 +0930)
ccan/hashtable/_info [new file with mode: 0644]
ccan/hashtable/hashtable.c [new file with mode: 0644]
ccan/hashtable/hashtable.h [new file with mode: 0644]
ccan/hashtable/test/run.c [new file with mode: 0644]

diff --git a/ccan/hashtable/_info b/ccan/hashtable/_info
new file mode 100644 (file)
index 0000000..4bc4cc4
--- /dev/null
@@ -0,0 +1,116 @@
+#include <string.h>
+
+/**
+ * hashtable - hash table routines
+ *
+ * A hash table is an efficient structure for looking up keys.  This version
+ * grows with usage and allows efficient deletion.
+ *
+ * Caveat: pointers placed into the hash must be aligned to 64 bits.
+ * ie. from malloc or because the structure they point to is aligned.
+ * An assert will be raised if this isn't true.
+ *
+ * Example:
+ *     #include <ccan/hashtable/hashtable.h>
+ *     #include <ccan/hash/hash.h>
+ *     #include <stdio.h>
+ *     #include <err.h>
+ *     #include <string.h>
+ *     
+ *     struct name_to_digit {
+ *             const char *name;
+ *             unsigned int val;
+ *     };
+ *     
+ *     static struct name_to_digit map[] = { 
+ *             { "zero", 0},
+ *             { "one", 1 },
+ *             { "two", 2 },
+ *             { "three", 3 },
+ *             { "four", 4 },
+ *             { "five", 5 },
+ *             { "six", 6 },
+ *             { "seven", 7 },
+ *             { "eight", 8 },
+ *             { "nine", 9 }
+ *     };
+ *     
+ *     // Wrapper for rehash function pointer.
+ *     static unsigned long rehash(const void *e, void *unused)
+ *     {
+ *             return hash_string(((struct name_to_digit *)e)->name);
+ *     }
+ *     
+ *     // Comparison function.
+ *     static bool streq(const void *e, void *string)
+ *     {
+ *             return strcmp(((struct name_to_digit *)e)->name, string) == 0;
+ *     }
+ *     
+ *     // We let them add their own aliases, eg. --alias=v=5
+ *     static void add_alias(struct hashtable *ht, const char *alias)
+ *     {
+ *             char *eq;
+ *             struct name_to_digit *n;
+ *     
+ *             n = malloc(sizeof(*n));
+ *             n->name = strdup(alias);
+ *     
+ *             eq = strchr(n->name, '=');
+ *             if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0")))
+ *                     errx(1, "Usage: --alias=<name>=<value>");
+ *             *eq = '\0';
+ *             hashtable_add(ht, hash_string(n->name), n);
+ *     }
+ *     
+ *     int main(int argc, char *argv[])
+ *     {
+ *             struct hashtable *ht;
+ *             unsigned int i;
+ *             unsigned long val;
+ *     
+ *             if (argc < 2)
+ *                     errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...",
+ *                          argv[0]);
+ *     
+ *             // Create and populate hash table.
+ *             ht = hashtable_new(rehash, NULL);
+ *             for (i = 0; i < sizeof(map)/sizeof(map[0]); i++)
+ *                     hashtable_add(ht, hash_string(map[i].name), &map[i]);
+ *     
+ *             // Add any aliases to the hash table.
+ *             for (i = 1; i < argc; i++) {
+ *                     if (!strncmp(argv[i], "--alias=", strlen("--alias=")))
+ *                             add_alias(ht, argv[i] + strlen("--alias="));
+ *                     else
+ *                             break;
+ *             }
+ *     
+ *             // Find the other args in the hash table.
+ *             for (val = 0; i < argc; i++) {
+ *                     struct name_to_digit *n;
+ *                     n = hashtable_find(ht, hash_string(argv[i]),
+ *                                        streq, argv[i]);
+ *                     if (!n)
+ *                             errx(1, "Invalid digit name %s", argv[i]);
+ *                     // Append it to the value we are building up.
+ *                     val *= 10;
+ *                     val += n->val;
+ *             }
+ *             printf("%lu\n", val);
+ *             return 0;
+ *     }
+ *
+ * Licence: GPLv2 (or later)
+ */
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/hashtable/hashtable.c b/ccan/hashtable/hashtable.c
new file mode 100644 (file)
index 0000000..46f85ac
--- /dev/null
@@ -0,0 +1,201 @@
+#include <ccan/hashtable/hashtable.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* This means a struct hashtable takes at least 512 bytes / 1k (32/64 bits). */
+#define HASHTABLE_BASE_BITS 7
+
+/* We use 0x1 as deleted marker. */
+#define HASHTABLE_DELETED ((void *)0x1)
+
+/* Number of redundant bits in a pointer. */
+#define PTR_STEAL_BITS 3
+
+static inline uintptr_t get_extra_ptr_bits(const void *p)
+{
+       return (uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
+}
+
+static inline void *get_raw_ptr(const void *p)
+{
+       return (void *)((uintptr_t)p & ~(((uintptr_t)1 << PTR_STEAL_BITS)-1));
+}
+
+static inline void *make_ptr(const void *p, uintptr_t bits)
+{
+       return (void *)((uintptr_t)p | bits);
+}
+
+struct hashtable {
+       unsigned long (*rehash)(const void *elem, void *priv);
+       void *priv;
+       unsigned int bits;
+       unsigned long elems, max;
+       void **table;
+};
+
+static inline bool ptr_is_valid(const void *e)
+{
+       return e > HASHTABLE_DELETED;
+}
+
+struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
+                                                       void *priv),
+                               void *priv)
+{
+       struct hashtable *ht = malloc(sizeof(struct hashtable));
+       if (ht) {
+               ht->bits = HASHTABLE_BASE_BITS;
+               ht->rehash = rehash;
+               ht->priv = priv;
+               ht->elems = 0;
+               ht->max = (1 << ht->bits) * 3 / 4;
+               ht->table = calloc(1 << ht->bits, sizeof(void *));
+               if (!ht->table) {
+                       free(ht);
+                       ht = NULL;
+               }
+       }
+       return ht;
+}
+
+void hashtable_free(struct hashtable *ht)
+{
+       free(ht->table);
+       free(ht);
+}
+
+static unsigned long hash_bucket(const struct hashtable *ht,
+                                unsigned long h, unsigned long *ptrbits)
+{
+       *ptrbits = (h >> ht->bits) & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
+       return h & ((1 << ht->bits)-1);
+}
+
+/* This does not expand the hash table, that's up to caller. */
+static void ht_add(struct hashtable *ht, const void *new, unsigned long h)
+{
+       unsigned long i, h2;
+
+       i = hash_bucket(ht, h, &h2);
+
+       while (ptr_is_valid(ht->table[i])) {
+               i = (i + 1) & ((1 << ht->bits)-1);
+       }
+       ht->table[i] = make_ptr(new, h2);
+}
+
+static bool double_table(struct hashtable *ht)
+{
+       unsigned int i;
+       size_t oldnum = (size_t)1 << ht->bits;
+       void **oldtable, *e;
+
+       oldtable = ht->table;
+       ht->table = calloc(1 << (ht->bits+1), sizeof(void *));
+       if (!ht->table) {
+               ht->table = oldtable;
+               return false;
+       }
+       ht->bits++;
+       ht->max *= 2;
+
+       for (i = 0; i < oldnum; i++) {
+               if (ptr_is_valid(e = oldtable[i])) {
+                       e = get_raw_ptr(e);
+                       ht_add(ht, e, ht->rehash(e, ht->priv));
+               }
+       }
+       free(oldtable);
+       return true;
+}
+
+bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p)
+{
+       if (ht->elems+1 > ht->max && !double_table(ht))
+               return false;
+       ht->elems++;
+       assert(p);
+       assert(((uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1)) == 0);
+       ht_add(ht, p, hash);
+       return true;
+}
+
+/* Find bucket for an element. */
+static size_t ht_find(struct hashtable *ht,
+                     unsigned long hash,
+                     bool (*cmp)(const void *htelem, void *cmpdata),
+                     void *cmpdata)
+{
+       void *e;
+       unsigned long i, h2;
+
+       i = hash_bucket(ht, hash, &h2);
+
+       while ((e = ht->table[i]) != NULL) {
+               if (e != HASHTABLE_DELETED
+                   && h2 == get_extra_ptr_bits(e)
+                   && cmp(get_raw_ptr(ht->table[i]), cmpdata))
+                       return i;
+               i = (i + 1) & ((1 << ht->bits)-1);
+       }
+       return (size_t)-1;
+}
+
+
+/* If every one of the following buckets are DELETED (up to the next unused
+   one), we can actually mark them all unused. */
+static void delete_run(struct hashtable *ht, unsigned int num)
+{
+       unsigned int i, last = num + 1;
+
+       while (ht->table[last & ((1 << ht->bits)-1)]) {
+               if (ptr_is_valid(ht->table[last & ((1 << ht->bits)-1)]))
+                       return;
+               last++;
+       }
+       for (i = num; i < last; i++)
+               ht->table[i & ((1 << ht->bits)-1)] = NULL;
+}
+
+void *hashtable_find(struct hashtable *ht,
+                    unsigned long hash,
+                    bool (*cmp)(const void *htelem, void *cmpdata),
+                    void *cmpdata)
+{
+       size_t i = ht_find(ht, hash, cmp, cmpdata);
+       if (i != (size_t)-1)
+               return get_raw_ptr(ht->table[i]);
+       return NULL;
+}
+
+static bool ptr_cmp(const void *htelem, void *cmpdata)
+{
+       return htelem == cmpdata;
+}
+
+bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p)
+{
+       size_t i = ht_find(ht, hash, ptr_cmp, (void *)p);
+       if (i == (size_t)-1)
+               return false;
+
+       ht->elems--;
+       ht->table[i] = HASHTABLE_DELETED;
+       delete_run(ht, i);
+       return true;
+}
+       
+       
+void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
+                       void *cbarg)
+{
+       size_t i;
+
+       for (i = 0; i < (1 << ht->bits); i++) {
+               if (ptr_is_valid(ht->table[i]))
+                       if (cb(get_raw_ptr(ht->table[i]), cbarg))
+                               return;
+       }
+}
diff --git a/ccan/hashtable/hashtable.h b/ccan/hashtable/hashtable.h
new file mode 100644 (file)
index 0000000..269e942
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef CCAN_HASHTABLE_H
+#define CCAN_HASHTABLE_H
+#include "config.h"
+#include <stdbool.h>
+
+struct hashtable;
+
+/**
+ * hashtable_new - allocate a hash tree.
+ * @rehash: hash function to use for rehashing.
+ * @priv: private argument to @rehash function.
+ */
+struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
+                                                       void *priv),
+                               void *priv);
+
+/**
+ * hashtable_free - dellocate a hash tree.
+ *
+ * This doesn't do anything to any pointers left in it.
+ */
+void hashtable_free(struct hashtable *);
+
+/**
+ * hashtable_find - look for an object in a hash tree.
+ * @ht: the hashtable
+ * @hash: the hash value of the object you are seeking
+ * @cmp: a comparison function: returns true if the object is found
+ * @cmpdata: the data to hand as second arg to @cmp
+ *
+ * Note that you can do all the work inside the cmp function if you want to:
+ * hashtable_find will return @htelem if @cmp returns true.
+ */
+void *hashtable_find(struct hashtable *ht,
+                    unsigned long hash,
+                    bool (*cmp)(const void *htelem, void *cmpdata),
+                    void *cmpdata);
+
+/**
+ * hashtable_add - add an (aligned) pointer into a hash tree.
+ * @ht: the hashtable
+ * @hash: the hash value of the object
+ * @p: the non-NULL pointer, usually from malloc or equiv.
+ *
+ * Note that this implementation (ab)uses the lower bits of the pointer, so
+ * it will abort if the pointer is not aligned to 8 bytes.
+ *
+ * Also note that this can only fail due to allocation failure.  Otherwise, it
+ * returns true.
+ */
+bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p);
+
+/**
+ * hashtable_del - remove a pointer from a hash tree
+ * @ht: the hashtable
+ * @hash: the hash value of the object
+ * @p: the pointer
+ *
+ * Returns true if the pointer was found (and deleted).
+ */
+bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p);
+
+/**
+ * hashtable_traverse - call a function on every pointer in hash tree
+ * @ht: the hashtable
+ * @cb: the callback: returns true to abort traversal.
+ * @cbarg: the argument to the callback
+ *
+ * Note that your traversal callback may delete any entry (it won't crash),
+ * but it may make the traverse unreliable.
+ */
+void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
+                       void *cbarg);
+#endif /* CCAN_HASHTABLE_H */
diff --git a/ccan/hashtable/test/run.c b/ccan/hashtable/test/run.c
new file mode 100644 (file)
index 0000000..e072cae
--- /dev/null
@@ -0,0 +1,248 @@
+#include <ccan/hashtable/hashtable.h>
+#include <ccan/hashtable/hashtable.c>
+#include <ccan/tap/tap.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define NUM_VALS (1 << HASHTABLE_BASE_BITS)
+
+/* We use the number divided by two as the hash (for lots of
+   collisions), plus set all the higher bits so we can detect if they
+   don't get masked out. */
+static unsigned long hash(const void *elem, void *unused)
+{
+       unsigned long h = *(uint64_t *)elem / 2;
+       h |= -1UL << HASHTABLE_BASE_BITS;
+       return h;
+}
+
+static bool objcmp(const void *htelem, void *cmpdata)
+{
+       return *(uint64_t *)htelem == *(uint64_t *)cmpdata;
+}
+
+static void add_vals(struct hashtable *ht,
+                    const uint64_t val[], unsigned int num)
+{
+       uint64_t i;
+
+       for (i = 0; i < num; i++) {
+               if (hashtable_find(ht, hash(&i, NULL), objcmp, &i)) {
+                       fail("%llu already in hash", (long long)i);
+                       return;
+               }
+               hashtable_add(ht, hash(&val[i], NULL), &val[i]);
+               if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
+                       fail("%llu not added to hash", (long long)i);
+                       return;
+               }
+       }
+       pass("Added %llu numbers to hash", (long long)i);
+}
+
+static void refill_vals(struct hashtable *ht,
+                       const uint64_t val[], unsigned int num)
+{
+       uint64_t i;
+
+       for (i = 0; i < num; i++) {
+               if (hashtable_find(ht, hash(&i, NULL), objcmp, &i))
+                       continue;
+               hashtable_add(ht, hash(&val[i], NULL), &val[i]);
+       }
+}
+
+static void find_vals(struct hashtable *ht,
+                     const uint64_t val[], unsigned int num)
+{
+       uint64_t i;
+
+       for (i = 0; i < num; i++) {
+               if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
+                       fail("%llu not found in hash", (long long)i);
+                       return;
+               }
+       }
+       pass("Found %llu numbers in hash", (long long)i);
+}
+
+static void del_vals(struct hashtable *ht,
+                    const uint64_t val[], unsigned int num)
+{
+       uint64_t i;
+
+       for (i = 0; i < num; i++) {
+               if (!hashtable_del(ht, hash(&val[i], NULL), &val[i])) {
+                       fail("%llu not deleted from hash", (long long)i);
+                       return;
+               }
+       }
+       pass("Deleted %llu numbers in hash", (long long)i);
+}
+
+struct travarg {
+       unsigned int count;
+       struct hashtable *ht;
+       char touched[NUM_VALS];
+       uint64_t *val;
+};
+
+static bool count(void *p, void *cbarg)
+{
+       struct travarg *travarg = cbarg;
+       travarg->count++;
+       travarg->touched[*(uint64_t *)p]++;
+       return false;
+}
+
+static bool delete_self(void *p, void *cbarg)
+{
+       struct travarg *travarg = cbarg;
+       uint64_t val = *(uint64_t *)p;
+
+       travarg->count++;
+       travarg->touched[val]++;
+       return !hashtable_del(travarg->ht, hash(p, NULL), p);
+}
+
+static bool delete_next(void *p, void *cbarg)
+{
+       struct travarg *travarg = cbarg;
+       uint64_t val = *(uint64_t *)p;
+       uint64_t *next = &travarg->val[(val + 1) % NUM_VALS];
+
+       travarg->count++;
+       travarg->touched[val]++;
+       return !hashtable_del(travarg->ht, hash(next, NULL), next);
+}
+
+static bool delete_prev(void *p, void *cbarg)
+{
+       struct travarg *travarg = cbarg;
+       uint64_t val = *(uint64_t *)p;
+       uint64_t *prev = &travarg->val[(val - 1) % NUM_VALS];
+
+       travarg->count++;
+       travarg->touched[val]++;
+       return !hashtable_del(travarg->ht, hash(prev, NULL), prev);
+}
+
+static bool stop_halfway(void *p, void *cbarg)
+{
+       struct travarg *travarg = cbarg;
+       travarg->count++;
+       travarg->touched[*(uint64_t *)p]++;
+
+       return (travarg->count == NUM_VALS / 2);
+}
+
+static void check_all_touched_once(struct travarg *travarg)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_VALS; i++) {
+               if (travarg->touched[i] != 1) {
+                       fail("Value %u touched %u times",
+                            i, travarg->touched[i]);
+                       return;
+               }
+       }
+       pass("All values touched once");
+}
+
+static void check_only_touched_once(struct travarg *travarg)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_VALS; i++) {
+               if (travarg->touched[i] > 1) {
+                       fail("Value %u touched multiple (%u) times",
+                            i, travarg->touched[i]);
+                       return;
+               }
+       }
+       pass("No value touched twice");
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct hashtable *ht;
+       uint64_t val[NUM_VALS];
+       struct travarg travarg;
+       uint64_t dne;
+
+       plan_tests(20);
+       for (i = 0; i < NUM_VALS; i++)
+               val[i] = i;
+       dne = i;
+
+       ht = hashtable_new(hash, NULL);
+       ok1(ht->max < (1 << ht->bits));
+       ok1(ht->bits == HASHTABLE_BASE_BITS);
+
+       /* We cannot find an entry which doesn't exist. */
+       ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
+
+       /* Fill it, it should increase in size (once). */
+       add_vals(ht, val, NUM_VALS);
+       ok1(ht->bits == HASHTABLE_BASE_BITS + 1);
+       ok1(ht->max < (1 << ht->bits));
+
+       /* Find all. */
+       find_vals(ht, val, NUM_VALS);
+       ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
+
+       /* Delete all. */
+       del_vals(ht, val, NUM_VALS);
+       ok1(!hashtable_find(ht, hash(&val[0], NULL), objcmp, &val[0]));
+
+       /* Refill. */
+       refill_vals(ht, val, NUM_VALS);
+
+       /* Traverse tests. */
+       travarg.ht = ht;
+       travarg.val = val;
+       memset(travarg.touched, 0, sizeof(travarg.touched));
+       travarg.count = 0;
+
+       /* Traverse. */
+       hashtable_traverse(ht, count, &travarg);
+       ok1(travarg.count == NUM_VALS);
+       check_all_touched_once(&travarg);
+
+       memset(travarg.touched, 0, sizeof(travarg.touched));
+       travarg.count = 0;
+       hashtable_traverse(ht, stop_halfway, &travarg);
+       ok1(travarg.count == NUM_VALS / 2);
+       check_only_touched_once(&travarg);
+
+       memset(travarg.touched, 0, sizeof(travarg.touched));
+       travarg.count = 0;
+       i = 0;
+       /* Delete until we make no more progress. */
+       for (;;) {
+               hashtable_traverse(ht, delete_self, &travarg);
+               if (travarg.count == i || travarg.count > NUM_VALS)
+                       break;
+               i = travarg.count;
+       }
+       ok1(travarg.count == NUM_VALS);
+       check_all_touched_once(&travarg);
+
+       memset(travarg.touched, 0, sizeof(travarg.touched));
+       travarg.count = 0;
+       refill_vals(ht, val, NUM_VALS);
+       hashtable_traverse(ht, delete_next, &travarg);
+       ok1(travarg.count <= NUM_VALS);
+       check_only_touched_once(&travarg);
+
+       memset(travarg.touched, 0, sizeof(travarg.touched));
+       travarg.count = 0;
+       refill_vals(ht, val, NUM_VALS);
+       hashtable_traverse(ht, delete_prev, &travarg);
+       ok1(travarg.count <= NUM_VALS);
+       check_only_touched_once(&travarg);
+
+       return exit_status();
+}