From 6ceee26828dcb8bf1d607a8221f0d9f8261d448b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 8 Nov 2010 16:23:17 +1030 Subject: [PATCH] htable: rehash to clear delete markers Delete markers build up over time, even if we try to clean as we go. So just garbage collect them when there are too many. Before: rusty@vivaldi:~/devel/cvs/ccan/ccan/htable/tools (htable)$ ./speed 50000000 Initial insert: 467 ns Details: hash size 134217728, mask bits 5, perfect 81% Initial lookup (match): 160 ns Initial lookup (miss): 169 ns Initial lookup (random): 328 ns Initial delete all: 165 ns Details: rehashes 50000000 Initial re-inserting: 317 ns Deleting first half: 86 ns Details: rehashes 25000000, delete markers 25000000 Adding (a different) half: 89 ns Details: delete markers 18894324, perfect 76% Lookup after half-change (match): 174 ns Lookup after half-change (miss): 203 ns Details: initial churn Churning second time: 816 ns Churning third time: 615 ns Churning fourth time: 621 ns Churning fifth time: 846 ns Details: reinserting with spread Details: delete markers 11078719, perfect 74% Details: worst run 48 (4 deleted) Lookup after churn & spread (match): 191 ns Lookup after churn & spread (miss): 208 ns Lookup after churn & spread (random): 374 ns Deleting half after churn & spread: 102 ns Adding (a different) half after churn & spread: 103 ns Details: delete markers 27442234, perfect 73% After: Initial insert: 462 ns Details: hash size 134217728, mask bits 5, perfect 81% Initial lookup (match): 161 ns Initial lookup (miss): 168 ns Initial lookup (random): 326 ns Initial delete all: 164 ns Details: rehashes 50000000 Initial re-inserting: 167 ns Deleting first half: 86 ns Details: rehashes 25000000, delete markers 25000000 Adding (a different) half: 217 ns Details: delete markers 0, perfect 81% Lookup after half-change (match): 169 ns Lookup after half-change (miss): 180 ns Details: initial churn Churning second time: 593 ns Churning third time: 611 ns Churning fourth time: 619 ns Churning fifth time: 622 ns Details: reinserting with spread Details: delete markers 13800468, perfect 73% Details: worst run 48 (4 deleted) Lookup after churn & spread (match): 192 ns Lookup after churn & spread (miss): 211 ns Lookup after churn & spread (random): 373 ns Deleting half after churn & spread: 103 ns Adding (a different) half after churn & spread: 102 ns Details: delete markers 29539689, perfect 72% --- ccan/htable/htable.c | 56 +++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/ccan/htable/htable.c b/ccan/htable/htable.c index 938d410a..48a73407 100644 --- a/ccan/htable/htable.c +++ b/ccan/htable/htable.c @@ -15,7 +15,7 @@ struct htable { size_t (*rehash)(const void *elem, void *priv); void *priv; unsigned int bits; - size_t elems, max; + size_t elems, deleted, max, max_with_deleted; /* These are the bits which are the same in all pointers. */ uintptr_t common_mask, common_bits; uintptr_t *table; @@ -62,7 +62,9 @@ struct htable *htable_new(size_t (*rehash)(const void *elem, void *priv), ht->rehash = rehash; ht->priv = priv; ht->elems = 0; - ht->max = (1 << ht->bits) * 3 / 4; + ht->deleted = 0; + ht->max = (1 << ht->bits) * 2 / 3; + ht->max_with_deleted = (1 << ht->bits) * 4 / 5; /* This guarantees we enter update_common first add. */ ht->common_mask = -1; ht->common_bits = 0; @@ -150,7 +152,7 @@ static COLD_ATTRIBUTE bool double_table(struct htable *ht) { unsigned int i; size_t oldnum = (size_t)1 << ht->bits; - size_t *oldtable, e; + uintptr_t *oldtable, e; oldtable = ht->table; ht->table = calloc(1 << (ht->bits+1), sizeof(size_t)); @@ -160,6 +162,7 @@ static COLD_ATTRIBUTE bool double_table(struct htable *ht) } ht->bits++; ht->max *= 2; + ht->max_with_deleted *= 2; for (i = 0; i < oldnum; i++) { if (entry_is_valid(e = oldtable[i])) { @@ -167,10 +170,33 @@ static COLD_ATTRIBUTE bool double_table(struct htable *ht) ht_add(ht, p, ht->rehash(p, ht->priv)); } } + ht->deleted = 0; free(oldtable); return true; } +static COLD_ATTRIBUTE void rehash_table(struct htable *ht) +{ + size_t start, i; + uintptr_t e; + + /* Beware wrap cases: we need to start from first empty bucket. */ + for (start = 0; ht->table[start]; start++); + + for (i = 0; i < (size_t)1 << ht->bits; i++) { + size_t h = (i + start) & ((1 << ht->bits)-1); + e = ht->table[h]; + if (!e) + continue; + ht->table[h] = 0; + if (e != HTABLE_DELETED) { + void *p = get_raw_ptr(ht, e); + ht_add(ht, p, ht->rehash(p, ht->priv)); + } + } + ht->deleted = 0; +} + /* We stole some bits, now we need to put them back... */ static COLD_ATTRIBUTE void update_common(struct htable *ht, const void *p) { @@ -207,6 +233,8 @@ bool htable_add(struct htable *ht, size_t hash, const void *p) { if (ht->elems+1 > ht->max && !double_table(ht)) return false; + if (ht->elems+1 + ht->deleted > ht->max_with_deleted) + rehash_table(ht); assert(p); if (((uintptr_t)p & ht->common_mask) != ht->common_bits) update_common(ht, p); @@ -216,26 +244,6 @@ bool htable_add(struct htable *ht, size_t hash, const void *p) return true; } -/* If every one of the following buckets are DELETED (up to the next unused - one), we can actually mark them all unused. */ -static void delete_run(struct htable *ht, unsigned int num) -{ - unsigned int i, last = num + 1; - size_t mask = (((size_t)1 << ht->bits)-1); - - while (ht->table[last & mask]) { - if (entry_is_valid(ht->table[last & mask])) - return; - last++; - } - - /* Now see if we can step backwards to find previous deleted ones. */ - for (i = num-1; ht->table[i & mask] == HTABLE_DELETED; i--); - - for (i++; i < last; i++) - ht->table[i & ((1 << ht->bits)-1)] = 0; -} - bool htable_del(struct htable *ht, size_t h, const void *p) { struct htable_iter i; @@ -257,5 +265,5 @@ void htable_delval(struct htable *ht, struct htable_iter *i) ht->elems--; ht->table[i->off] = HTABLE_DELETED; - delete_run(ht, i->off); + ht->deleted++; } -- 2.39.2