From: Rusty Russell Date: Mon, 26 Mar 2018 10:36:34 +0000 (+1030) Subject: intmap: reimplement so that intmap_after works. X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=e59acd521d11c1f392264762d79195fd6cc2b992;hp=82229812012a9cc9a4876b9542f5539ec99ece46 intmap: reimplement so that intmap_after works. A critbit tree is a binary tree which keeps branches for each bit which differs in the leaves. It's a simple data structure, but not entirely simple to implement the primitives people expect, as this bus shows. The bug: I added an open iterator, and intmap_after_ for a random value would sometimes return the wrong node. Cause: we don't know what the prefix is as we iterate, so by only testing the critbits in the tree, we can end up in the wrong place. This is only a problem if the value isn't in the (sub)tree, but this can easily happen even with contiguous trees should deletion occur. You can see an example in the next patch, which adds a test. After finding a bug in my intmap_after() routine, I went searching for other implementations to see how they handled it. Most didn't provide an open-ended iterator like this, relying on callback iterators which don't allow deletion. Gah! The exception was https://github.com/blynn/blt/blob/master/blt.c#L179 which implements blt_ceil() which does this (if you add one to the key, at least). However, it does it by effectively finding a node, using that to derive the prefix, then walking down the tree again. That's pretty suboptimal. There are basically two choices if you want an efficient after() operation: to reimplement this approach with some optimizations (ie. keep branches as we descend, and when we get to the bottom and know the prefix, we know which branch to go down), or keep the bits which got to each node. The latter is more optimal, but less generally useful: for bit strings, for example, we could keep the bits in common on each node, rather than storing the entire string at the bottom. But in practice you'd be doing allocations to re-create the index if the caller wanted it. However, in this implementation our keys are 64 bits only, and we already use a u8 for the bit number: using a 64-bit value there consumes no more space (thanks to alignment). We can store the critbit by using the prefix capped by a bit: 0b10000...0000 means no prefix and highest bit is the critbit, and 0bxxxxx1000...000 means the prefix is xxxxxx and the critbit is the 6th highest bit. The penalty is that iteration 70% slower. It's still pretty fast though. Before: $ for i in `seq 5`; do ./speed 10000000; done | stats 10000000,random generation (nsec),3-4(3.2+/-0.4) 10000000,critbit insert (nsec),1530-1751(1633.2+/-80) 10000000,critbit successful lookup (nsec),1723-1993(1806.8+/-97) 10000000,critbit failed lookup (nsec),1763-2104(1933.6+/-1.3e+02) 10000000,critbit iteration (nsec),208-266(242.2+/-19) 10000000,critbit memory (bytes),48 10000000,critbit delete (nsec),1747-1861(1803.8+/-42) 10000000,critbit consecutive iteration (nsec),182-228(210+/-18) After: 10000000,random generation (nsec),3-4(3.2+/-0.4) 10000000,critbit insert (nsec),1533-1699(1628+/-65) 10000000,critbit successful lookup (nsec),1831-2104(1972.4+/-1e+02) 10000000,critbit failed lookup (nsec),1850-2152(2008.2+/-1.1e+02) 10000000,critbit iteration (nsec),304-324(312.8+/-7.5) 10000000,critbit memory (bytes),48 10000000,critbit delete (nsec),1617-1872(1752+/-99) 10000000,critbit consecutive iteration (nsec),303-318(311+/-5.4) Signed-off-by: Rusty Russell --- diff --git a/ccan/intmap/_info b/ccan/intmap/_info index d9ba4622..1b64e1b6 100644 --- a/ccan/intmap/_info +++ b/ccan/intmap/_info @@ -22,7 +22,7 @@ int main(int argc, char *argv[]) return 1; if (strcmp(argv[1], "depends") == 0) { - printf("ccan/ilog\n" + printf("ccan/bitops\n" "ccan/short_types\n" "ccan/str\n" "ccan/tcon\n" diff --git a/ccan/intmap/benchmark/speed.c b/ccan/intmap/benchmark/speed.c index 7f59af29..e74b12aa 100644 --- a/ccan/intmap/benchmark/speed.c +++ b/ccan/intmap/benchmark/speed.c @@ -13,8 +13,8 @@ struct node { /* These point to strings or nodes. */ struct intmap child[2]; - /* The bit where these children differ (0 == lsb) */ - uint8_t bit_num; + /* Encoding both prefix and critbit: 1 is appended to prefix. */ + intmap_index_t prefix_and_critbit; }; static void update_span(const void *p, size_t s, uintptr_t *min, uintptr_t *max) diff --git a/ccan/intmap/intmap.c b/ccan/intmap/intmap.c index 447f8eec..ab6ddc9d 100644 --- a/ccan/intmap/intmap.c +++ b/ccan/intmap/intmap.c @@ -1,9 +1,9 @@ /* CC0 license (public domain) - see LICENSE file for details */ /* This code is based on ccan/strmap.c. */ +#include #include #include #include -#include #include #include #include @@ -11,28 +11,38 @@ struct node { /* These point to strings or nodes. */ struct intmap child[2]; - /* The bit where these children differ (0 == lsb) */ - u8 bit_num; + /* Encoding both prefix and critbit: 1 is appended to prefix. */ + intmap_index_t prefix_and_critbit; }; -/* Closest member to this in a non-empty map. */ -static struct intmap *closest(struct intmap *n, intmap_index_t index) +static int critbit(const struct intmap *n) { - /* Anything with NULL value is a node. */ - while (!n->v) { - u8 direction = (index >> n->u.n->bit_num) & 1; - n = &n->u.n->child[direction]; - } - return n; + return bitops_ls64(n->u.n->prefix_and_critbit); } -void *intmap_get_(const struct intmap *map, intmap_index_t index) +static intmap_index_t prefix_mask(int critbit) { - struct intmap *n; + /* Mask does not include critbit itself, but can't shift by critbit+1 */ + return -2ULL << critbit; +} + +static intmap_index_t prefix_and_critbit(intmap_index_t v, int n) +{ + intmap_index_t critbit = ((intmap_index_t)1 << n); + return (v & ~(critbit - 1)) | critbit; +} +void *intmap_get_(const struct intmap *map, intmap_index_t index) +{ /* Not empty map? */ if (!intmap_empty_(map)) { - n = closest((struct intmap *)map, index); + const struct intmap *n = map; + /* Anything with NULL value is a node. */ + while (!n->v) { + /* FIXME: compare cmp prefix, if not equal, ENOENT */ + u8 direction = (index >> critbit(n)) & 1; + n = &n->u.n->child[direction]; + } if (index == n->u.i) return n->v; } @@ -40,36 +50,18 @@ void *intmap_get_(const struct intmap *map, intmap_index_t index) return NULL; } -bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value) +static bool split_node(struct intmap *n, intmap_index_t nodeindex, + intmap_index_t index, const void *value) { - struct intmap *n; struct node *newn; - u8 bit_num, new_dir; - - assert(value); - - /* Empty map? */ - if (intmap_empty_(map)) { - map->u.i = index; - map->v = (void *)value; - return true; - } - - /* Find closest existing member. */ - n = closest(map, index); + int new_dir; /* Find highest bit where they differ. */ - bit_num = ilog64(n->u.i ^ index); - if (bit_num == 0) { - errno = EEXIST; - return false; - } - bit_num--; - - assert(bit_num < CHAR_BIT*sizeof(index)); + unsigned int critbit = bitops_hs64(nodeindex ^ index); + assert(critbit < CHAR_BIT*sizeof(index)); /* Which direction do we go at this bit? */ - new_dir = (index >> bit_num) & 1; + new_dir = (index >> critbit) & 1; /* Allocate new node. */ newn = malloc(sizeof(*newn)); @@ -77,27 +69,48 @@ bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value) errno = ENOMEM; return false; } - newn->bit_num = bit_num; + newn->prefix_and_critbit = prefix_and_critbit(index, critbit); newn->child[new_dir].v = (void *)value; newn->child[new_dir].u.i = index; + newn->child[!new_dir] = *n; + + n->u.n = newn; + n->v = NULL; + return true; +} + +bool intmap_add_(struct intmap *map, intmap_index_t index, const void *value) +{ + struct intmap *n; + + assert(value); + + /* Empty map? */ + if (intmap_empty_(map)) { + map->u.i = index; + map->v = (void *)value; + return true; + } - /* Find where to insert: not closest, but first which differs! */ n = map; + /* Anything with NULL value is a node. */ while (!n->v) { - u8 direction; + int crit = critbit(n); + intmap_index_t mask = prefix_mask(crit); + u8 direction = (index >> crit) & 1; - /* Subtle: bit numbers are "backwards" for comparison */ - if (n->u.n->bit_num < bit_num) - break; - - direction = (index >> n->u.n->bit_num) & 1; + if ((index & mask) != (n->u.n->prefix_and_critbit & mask)) + return split_node(n, n->u.n->prefix_and_critbit & mask, + index, value); n = &n->u.n->child[direction]; } - newn->child[!new_dir] = *n; - n->u.n = newn; - n->v = NULL; - return true; + if (index == n->u.i) { + errno = EEXIST; + return false; + } + + return split_node(n, n->u.i, index, value); } void *intmap_del_(struct intmap *map, intmap_index_t index) @@ -116,8 +129,9 @@ void *intmap_del_(struct intmap *map, intmap_index_t index) n = map; /* Anything with NULL value is a node. */ while (!n->v) { + /* FIXME: compare cmp prefix, if not equal, ENOENT */ parent = n; - direction = (index >> n->u.n->bit_num) & 1; + direction = (index >> critbit(n)) & 1; n = &n->u.n->child[direction]; } @@ -163,38 +177,58 @@ void *intmap_first_(const struct intmap *map, intmap_index_t *indexp) void *intmap_after_(const struct intmap *map, intmap_index_t *indexp) { const struct intmap *n, *prev = NULL; + intmap_index_t index = (*indexp) + 1; + + /* Special case of overflow */ + if (index == 0) + goto none_left; /* Special case of empty map */ - if (intmap_empty_(map)) { - errno = ENOENT; - return NULL; - } + if (intmap_empty_(map)) + goto none_left; - /* Follow down, track the last place where we could have set a bit - * instead of clearing it: this is the higher alternative tree. */ + /* Follow down, until prefix differs. */ n = map; while (!n->v) { - u8 direction = (*indexp >> n->u.n->bit_num) & 1; + int crit = critbit(n); + u8 direction; + intmap_index_t prefix, idx; + + idx = (index >> crit); + direction = idx & 1; + + /* Leave critbit in place: we can't shift by 64 anyway */ + idx |= 1; + prefix = n->u.n->prefix_and_critbit >> crit; + + /* If this entire tree is greater than index, take first */ + if (idx < prefix) + return intmap_first_(n, indexp); + /* If this entire tree is less than index, we're past it. */ + else if (idx > prefix) + goto try_greater_tree; + + /* Remember greater tree for backtracking */ if (!direction) prev = n; n = &n->u.n->child[direction]; } /* Found a successor? */ - if (n->u.i > *indexp) { + if (n->u.i >= index) { errno = 0; *indexp = n->u.i; return n->v; } - /* Nowhere to go back up to? */ - if (!prev) { - errno = ENOENT; - return NULL; - } +try_greater_tree: + /* If we ever took a lesser branch, go back to greater branch */ + if (prev) + return intmap_first_(&prev->u.n->child[1], indexp); - /* Get first one from that other branch. */ - return intmap_first_(&prev->u.n->child[1], indexp); +none_left: + errno = ENOENT; + return NULL; } void *intmap_last_(const struct intmap *map, intmap_index_t *indexp)