]> git.ozlabs.org Git - ccan/commitdiff
htable: handle v. unlikely case where entries look deleted/empty.
authorRusty Russell <rusty@rustcorp.com.au>
Thu, 9 Jun 2022 03:12:31 +0000 (12:42 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 9 Jun 2022 04:12:44 +0000 (13:42 +0930)
If the hash function doesn't set any bits we use, and the common
bits are all zero the slot will look empty (or, just the lower bit is
set: the slow looks deleted).

However, each bucket is distinct since there are no duplicates, so
at worse there can be two "looks invalid but actually is valid"
buckets.  Keep them at the end.

Lookup suffers in raw tools/speed though:

-Lookup after half-change (match): 53-61(54.8+/-2.3) ns
+Lookup after half-change (match): 61-113(83.3+/-17) ns
-Lookup after churn & spread (match): 54-90(59.9+/-11) ns
+Lookup after churn & spread (match): 70-108(85.2+/-14) ns

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ccan/htable/htable.c
ccan/htable/test/run-clash.c [new file with mode: 0644]
ccan/htable/test/run-debug.c
ccan/htable/test/run.c
ccan/htable/tools/speed.c

index cffd0619d338d8146365abd3f10e1b308ef4c4d4..0371e81d0fac4837c221fcb7cb57fff5a12b8fdf 100644 (file)
@@ -56,9 +56,68 @@ static inline uintptr_t make_hval(const struct htable *ht,
        return ((uintptr_t)p & ~ht->common_mask) | bits;
 }
 
-static inline bool entry_is_valid(uintptr_t e)
+static inline uintptr_t *actually_valid_pair(const struct htable *ht)
 {
-       return e > HTABLE_DELETED;
+       return ht->table + ((size_t)1 << ht->bits);
+}
+
+/* We have have two entries which look deleted, but we remember
+ * they are not! */
+static inline bool entry_actually_valid(const struct htable *ht, size_t off)
+{
+       const uintptr_t *valid = actually_valid_pair(ht);
+       /* Empty table looks like this! */
+       if (valid == &ht->common_bits + 1)
+               return false;
+       return valid[0] == off || valid[1] == off;
+}
+
+/* Initialize the "actually valid" pair. */
+static inline void init_actually_valid(struct htable *ht)
+{
+       uintptr_t *valid = actually_valid_pair(ht);
+       valid[0] = valid[1] = ((size_t)1 << ht->bits);
+}
+
+/* Add to the "actually valid" pair: there can only ever be two! */
+static COLD void add_actually_valid(struct htable *ht, size_t off)
+{
+       uintptr_t *valid = actually_valid_pair(ht);
+       if (valid[0] == ((size_t)1 << ht->bits))
+               valid[0] = off;
+       else {
+               assert(valid[1] == ((size_t)1 << ht->bits));
+               valid[1] = off;
+       }
+}
+
+static COLD void del_actually_valid(struct htable *ht, size_t off)
+{
+       uintptr_t *validpair = actually_valid_pair(ht);
+       if (validpair[0] == off)
+               validpair[0] = ((size_t)1 << ht->bits);
+       else {
+               assert(validpair[1] == off);
+               validpair[1] = ((size_t)1 << ht->bits);
+       }
+}
+
+/* If this entry looks invalid, check entry_actually_valid! */
+static inline bool entry_looks_invalid(const struct htable *ht, size_t off)
+{
+       return ht->table[off] <= HTABLE_DELETED;
+}
+
+static inline bool entry_is_valid(const struct htable *ht, size_t off)
+{
+       if (!entry_looks_invalid(ht, off))
+               return true;
+       return entry_actually_valid(ht, off);
+}
+
+static inline bool entry_is_deleted(const struct htable *ht, size_t off)
+{
+       return ht->table[off] == HTABLE_DELETED && !entry_actually_valid(ht, off);
 }
 
 static inline uintptr_t ht_perfect_mask(const struct htable *ht)
@@ -96,6 +155,12 @@ static inline size_t ht_max_with_deleted(const struct htable *ht)
        return ((size_t)9 << ht->bits) / 10;
 }
 
+/* Includes the two trailing "not-deleted" entries */
+static size_t htable_alloc_size(size_t bits)
+{
+       return (sizeof(size_t) << bits) + 2 * sizeof(size_t);
+}
+
 bool htable_init_sized(struct htable *ht,
                       size_t (*rehash)(const void *, void *),
                       void *priv, size_t expect)
@@ -108,11 +173,12 @@ bool htable_init_sized(struct htable *ht,
                        break;
        }
 
-       ht->table = htable_alloc(ht, sizeof(size_t) << ht->bits);
+       ht->table = htable_alloc(ht, htable_alloc_size(ht->bits));
        if (!ht->table) {
                ht->table = &ht->common_bits;
                return false;
        }
+       init_actually_valid(ht);
        (void)htable_debug(ht, HTABLE_LOC);
        return true;
 }
@@ -126,14 +192,14 @@ void htable_clear(struct htable *ht)
 
 bool htable_copy_(struct htable *dst, const struct htable *src)
 {
-       uintptr_t *htable = htable_alloc(dst, sizeof(size_t) << src->bits);
+       uintptr_t *htable = htable_alloc(dst, htable_alloc_size(src->bits));
 
        if (!htable)
                return false;
 
        *dst = *src;
        dst->table = htable;
-       memcpy(dst->table, src->table, sizeof(size_t) << src->bits);
+       memcpy(dst->table, src->table, htable_alloc_size(src->bits));
        return true;
 }
 
@@ -147,8 +213,8 @@ static void *htable_val(const struct htable *ht,
 {
        uintptr_t h2 = get_hash_ptr_bits(ht, hash) | perfect;
 
-       while (ht->table[i->off]) {
-               if (ht->table[i->off] != HTABLE_DELETED) {
+       while (ht->table[i->off] || entry_actually_valid(ht, i->off)) {
+               if (!entry_is_deleted(ht, i->off)) {
                        if (get_extra_ptr_bits(ht, ht->table[i->off]) == h2)
                                return get_raw_ptr(ht, ht->table[i->off]);
                }
@@ -175,7 +241,7 @@ void *htable_nextval_(const struct htable *ht,
 void *htable_first_(const struct htable *ht, struct htable_iter *i)
 {
        for (i->off = 0; i->off < (size_t)1 << ht->bits; i->off++) {
-               if (entry_is_valid(ht->table[i->off]))
+               if (entry_is_valid(ht, i->off))
                        return get_raw_ptr(ht, ht->table[i->off]);
        }
        return NULL;
@@ -184,7 +250,7 @@ void *htable_first_(const struct htable *ht, struct htable_iter *i)
 void *htable_next_(const struct htable *ht, struct htable_iter *i)
 {
        for (i->off++; i->off < (size_t)1 << ht->bits; i->off++) {
-               if (entry_is_valid(ht->table[i->off]))
+               if (entry_is_valid(ht, i->off))
                        return get_raw_ptr(ht, ht->table[i->off]);
        }
        return NULL;
@@ -195,8 +261,8 @@ void *htable_prev_(const struct htable *ht, struct htable_iter *i)
        for (;;) {
                if (!i->off)
                        return NULL;
-               i->off --;
-               if (entry_is_valid(ht->table[i->off]))
+               i->off--;
+               if (entry_is_valid(ht, i->off))
                        return get_raw_ptr(ht, ht->table[i->off]);
        }
 }
@@ -209,26 +275,29 @@ static void ht_add(struct htable *ht, const void *new, size_t h)
 
        i = hash_bucket(ht, h);
 
-       while (entry_is_valid(ht->table[i])) {
+       while (entry_is_valid(ht, i)) {
                perfect = 0;
                i = (i + 1) & ((1 << ht->bits)-1);
        }
        ht->table[i] = make_hval(ht, new, get_hash_ptr_bits(ht, h)|perfect);
+
+       /* If it looks invalid, add it to exceptions */
+       if (ht->table[i] <= HTABLE_DELETED)
+               add_actually_valid(ht, i);
 }
 
 static COLD bool double_table(struct htable *ht)
 {
-       unsigned int i;
-       size_t oldnum = (size_t)1 << ht->bits;
-       uintptr_t *oldtable, e;
+       size_t i;
+       struct htable oldht = *ht;
 
-       oldtable = ht->table;
-       ht->table = htable_alloc(ht, sizeof(size_t) << (ht->bits+1));
+       ht->table = htable_alloc(ht, htable_alloc_size(ht->bits+1));
        if (!ht->table) {
-               ht->table = oldtable;
+               ht->table = oldht.table;
                return false;
        }
        ht->bits++;
+       init_actually_valid(ht);
 
        /* If we lost our "perfect bit", get it back now. */
        if (ht->perfect_bitnum == NO_PERFECT_BIT && ht->common_mask) {
@@ -240,14 +309,15 @@ static COLD bool double_table(struct htable *ht)
                }
        }
 
-       if (oldtable != &ht->common_bits) {
-               for (i = 0; i < oldnum; i++) {
-                       if (entry_is_valid(e = oldtable[i])) {
-                               void *p = get_raw_ptr(ht, e);
+       if (oldht.table != &ht->common_bits) {
+               for (i = 0; i < (size_t)1 << oldht.bits; i++) {
+                       if (entry_is_valid(&oldht, i)) {
+                               void *p = get_raw_ptr(&oldht, oldht.table[i]);
                                ht_add(ht, p, ht->rehash(p, ht->priv));
                        }
                }
-               htable_free(ht, oldtable);
+               /* Pass ht here to callback: oldht is an internal figment */
+               htable_free(ht, oldht.table);
        }
        ht->deleted = 0;
 
@@ -259,20 +329,33 @@ static COLD void rehash_table(struct htable *ht)
 {
        size_t start, i;
        uintptr_t e, perfect = ht_perfect_mask(ht);
+       uintptr_t *validpair = actually_valid_pair(ht);
 
        /* Beware wrap cases: we need to start from first empty bucket. */
        for (start = 0; ht->table[start]; start++);
 
        for (i = 0; i < (size_t)1 << ht->bits; i++) {
                size_t h = (i + start) & ((1 << ht->bits)-1);
+               uintptr_t *actually = NULL;
                e = ht->table[h];
-               if (!e)
-                       continue;
-               if (e == HTABLE_DELETED)
-                       ht->table[h] = 0;
-               else if (!(e & perfect)) {
+               if (e <= HTABLE_DELETED) {
+                       /* If it's actually valid, remember in case we move it! */
+                       if (validpair[0] == h) {
+                               actually = &validpair[0];
+                       } else if (validpair[1] == h) {
+                               actually = &validpair[1];
+                       } else {
+                               ht->table[h] = 0;
+                               continue;
+                       }
+               }
+
+               if (!(e & perfect)) {
                        void *p = get_raw_ptr(ht, e);
                        ht->table[h] = 0;
+                       /* Clear actuallyvalid, let ht_add refill */
+                       if (actually)
+                               *actually = ((size_t)1 << ht->bits);
                        ht_add(ht, p, ht->rehash(p, ht->priv));
                }
        }
@@ -287,16 +370,7 @@ static COLD void update_common(struct htable *ht, const void *p)
        uintptr_t maskdiff, bitsdiff;
 
        if (ht->elems == 0) {
-               /* Always reveal one bit of the pointer in the bucket,
-                * so it's not zero or HTABLE_DELETED (1), even if
-                * hash happens to be 0.  Assumes (void *)1 is not a
-                * valid pointer. */
-               for (i = sizeof(uintptr_t)*CHAR_BIT - 1; i > 0; i--) {
-                       if ((uintptr_t)p & ((uintptr_t)1 << i))
-                               break;
-               }
-
-               ht->common_mask = ~((uintptr_t)1 << i);
+               ht->common_mask = -1;
                ht->common_bits = ((uintptr_t)p & ht->common_mask);
                ht->perfect_bitnum = 0;
                (void)htable_debug(ht, HTABLE_LOC);
@@ -310,12 +384,16 @@ static COLD void update_common(struct htable *ht, const void *p)
        bitsdiff = ht->common_bits & maskdiff;
 
        for (i = 0; i < (size_t)1 << ht->bits; i++) {
-               if (!entry_is_valid(ht->table[i]))
+               if (!entry_is_valid(ht, i))
                        continue;
                /* Clear the bits no longer in the mask, set them as
                 * expected. */
                ht->table[i] &= ~maskdiff;
                ht->table[i] |= bitsdiff;
+
+               /* Make sure it's not newly falsely invalid */
+               if (ht->table[i] <= HTABLE_DELETED && !entry_actually_valid(ht, i))
+                       add_actually_valid(ht, i);
        }
 
        /* Take away those bits from our mask, bits and perfect bit. */
@@ -358,7 +436,9 @@ bool htable_del_(struct htable *ht, size_t h, const void *p)
 void htable_delval_(struct htable *ht, struct htable_iter *i)
 {
        assert(i->off < (size_t)1 << ht->bits);
-       assert(entry_is_valid(ht->table[i->off]));
+
+       if (entry_looks_invalid(ht, i->off))
+               del_actually_valid(ht, i->off);
 
        ht->elems--;
        ht->table[i->off] = HTABLE_DELETED;
@@ -384,6 +464,7 @@ struct htable *htable_check(const struct htable *ht, const char *abortstr)
        void *p;
        struct htable_iter i;
        size_t n = 0;
+       const uintptr_t *validpair = actually_valid_pair(ht);
 
        /* Use non-DEBUG versions here, to avoid infinite recursion with
         * CCAN_HTABLE_DEBUG! */
@@ -426,5 +507,35 @@ struct htable *htable_check(const struct htable *ht, const char *abortstr)
                return NULL;
        }
 
+       /* Check validpair does actually override invalid-looking entries! */
+       if (ht->table != &ht->common_bits) {
+               size_t i;
+               for (i = 0; i < 2; i++) {
+                       if (validpair[i] == ((size_t)1 << ht->bits))
+                               continue;
+                       if (validpair[i] > ((size_t)1 << ht->bits)) {
+                               if (abortstr) {
+                                       fprintf(stderr,
+                                               "%s: validpair[%zu] points at %zu"
+                                               " which is out of bounds\n",
+                                               abortstr, i, validpair[i]);
+                                       abort();
+                               }
+                               return NULL;
+                       }
+                       if (entry_looks_invalid(ht, validpair[i]))
+                               continue;
+
+                       if (abortstr) {
+                               fprintf(stderr,
+                                       "%s: validpair[%zu] points at %zu"
+                                       " which seems valid\n",
+                                       abortstr, i, validpair[i]);
+                               abort();
+                       }
+                       return NULL;
+               }
+       }
+
        return (struct htable *)ht;
 }
diff --git a/ccan/htable/test/run-clash.c b/ccan/htable/test/run-clash.c
new file mode 100644 (file)
index 0000000..0328c35
--- /dev/null
@@ -0,0 +1,30 @@
+#include <ccan/htable/htable.h>
+#include <ccan/htable/htable.c>
+#include <ccan/tap/tap.h>
+#include <stdbool.h>
+#include <string.h>
+
+/* Clashy hash */
+static size_t hash(const void *elem, void *unused UNNEEDED)
+{
+       return 0;
+}
+
+int main(void)
+{
+       struct htable ht;
+
+       plan_tests(254 * 253);
+       /* We try to get two elements which clash */
+       for (size_t i = 2; i < 256; i++) {
+               for (size_t j = 2; j < 256; j++) {
+                       if (i == j)
+                               continue;
+                       htable_init(&ht, hash, NULL);
+                       htable_add(&ht, hash((void *)i, NULL), (void *)i);
+                       htable_add(&ht, hash((void *)j, NULL), (void *)j);
+                       ok1(htable_check(&ht, "test"));
+               }
+       }
+       return exit_status();
+}
index b9f48ee70aa96afc0dec95d98d8240b2659bba17..399910354da21675f8767525a763d7bc44abfb14 100644 (file)
@@ -107,7 +107,7 @@ static bool check_mask(struct htable *ht, uint64_t val[], unsigned num)
 
 int main(void)
 {
-       unsigned int i, weight;
+       unsigned int i;
        uintptr_t perfect_bit;
        struct htable ht;
        uint64_t val[NUM_VALS];
@@ -131,14 +131,7 @@ int main(void)
        add_vals(&ht, val, 0, 1);
        ok1(ht.bits == 1);
        ok1(ht_max(&ht) == 1);
-       weight = 0;
-       for (i = 0; i < sizeof(ht.common_mask) * CHAR_BIT; i++) {
-               if (ht.common_mask & ((uintptr_t)1 << i)) {
-                       weight++;
-               }
-       }
-       /* Only one bit should be clear. */
-       ok1(weight == i-1);
+       ok1(ht.common_mask == -1);
 
        /* Mask should be set. */
        ok1(check_mask(&ht, val, 1));
index ada85f95a93d803d3db678e292bb2e33a23666cc..27007a41ac5f94d452ac4a91121b5a122d7f87fe 100644 (file)
@@ -97,7 +97,7 @@ static bool check_mask(struct htable *ht, uint64_t val[], unsigned num)
 
 int main(void)
 {
-       unsigned int i, weight;
+       unsigned int i;
        uintptr_t perfect_bit;
        struct htable ht;
        uint64_t val[NUM_VALS];
@@ -122,14 +122,7 @@ int main(void)
        add_vals(&ht, val, 0, 1);
        ok1(ht.bits == 1);
        ok1(ht_max(&ht) == 1);
-       weight = 0;
-       for (i = 0; i < sizeof(ht.common_mask) * CHAR_BIT; i++) {
-               if (ht.common_mask & ((uintptr_t)1 << i)) {
-                       weight++;
-               }
-       }
-       /* Only one bit should be clear. */
-       ok1(weight == i-1);
+       ok1(ht.common_mask == -1);
 
        /* Mask should be set. */
        ok1(check_mask(&ht, val, 1));
index e185b6f69eb74f3435789d82abe144eb32d8d1ca..3fd1e8926fce0d74fd92ece44a44edf62b50305d 100644 (file)
@@ -70,7 +70,7 @@ static size_t perfect(const struct htable *ht)
        size_t i, placed_perfect = 0;
 
        for (i = 0; i < ((size_t)1 << ht->bits); i++) {
-               if (!entry_is_valid(ht->table[i]))
+               if (!entry_is_valid(ht, i))
                        continue;
                if (hash_bucket(ht, ht->rehash(get_raw_ptr(ht, ht->table[i]),
                                               ht->priv)) == i) {
@@ -87,7 +87,7 @@ static size_t count_deleted(const struct htable *ht)
        size_t i, delete_markers = 0;
 
        for (i = 0; i < ((size_t)1 << ht->bits); i++) {
-               if (ht->table[i] == HTABLE_DELETED)
+               if (entry_is_deleted(ht, i))
                        delete_markers++;
        }
        return delete_markers;