X-Git-Url: https://git.ozlabs.org/?a=blobdiff_plain;ds=sidebyside;f=ccan%2Fstrset%2Fstrset.c;fp=ccan%2Fstrset%2Fstrset.c;h=8088db47e808e3adb40f5bf583a6a8e6babeec59;hb=ab83de953730f5e5e571dbf69ffb3cc685a102dc;hp=0000000000000000000000000000000000000000;hpb=0845e79650c9257aa0ddef8ff99fd815b5edffac;p=ccan diff --git a/ccan/strset/strset.c b/ccan/strset/strset.c new file mode 100644 index 00000000..8088db47 --- /dev/null +++ b/ccan/strset/strset.c @@ -0,0 +1,298 @@ +/* This code is based on the public domain code at + * http://github.com/agl/critbit writtem by Adam Langley + * . + * + * Here are the main implementation differences: + * (1) We don't strdup the string on insert; we use the pointer we're given. + * (2) We use a straight bit number rather than a mask; it's simpler. + * (3) We don't use the bottom bit of the pointer, but instead use a leading + * zero to distinguish nodes from strings. + * (4) The empty string (which would look like a node) is handled + * using a special "empty node". + * (5) Delete returns the string, so you can free it if you want to. + * (6) Unions instead of void *, bool instead of int. + */ +#include +#include +#include +#include +#include +#include +#include + +struct node { + /* To differentiate us from strings. */ + char nul_byte; + /* The bit where these children differ. */ + u8 bit_num; + /* The byte number where first bit differs (-1 == empty string node). */ + size_t byte_num; + /* These point to strings or nodes. */ + struct strset child[2]; +}; + +/* Closest member to this in a non-empty set. */ +static const char *closest(struct strset n, const char *member) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + + /* Anything with first byte 0 is a node. */ + while (!n.u.s[0]) { + u8 direction = 0; + + /* Special node which represents the empty string. */ + if (unlikely(n.u.n->byte_num == (size_t)-1)) { + n = n.u.n->child[0]; + break; + } + + if (n.u.n->byte_num < len) { + u8 c = bytes[n.u.n->byte_num]; + direction = (c >> n.u.n->bit_num) & 1; + } + n = n.u.n->child[direction]; + } + return n.u.s; +} + +char *strset_test(const struct strset *set, const char *member) +{ + const char *str; + + /* Empty set? */ + if (!set->u.n) + return NULL; + str = closest(*set, member); + if (streq(member, str)) + return (char *)str; + return NULL; +} + +static bool set_string(struct strset *set, + struct strset *n, const char *member) +{ + /* Substitute magic empty node if this is the empty string */ + if (unlikely(!member[0])) { + n->u.n = malloc(sizeof(*n->u.n)); + if (unlikely(!n->u.n)) + return false; + n->u.n->nul_byte = '\0'; + n->u.n->byte_num = (size_t)-1; + /* Attach the string to child[0] */ + n = &n->u.n->child[0]; + } + n->u.s = member; + return true; +} + +bool strset_set(struct strset *set, const char *member) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + struct strset *np; + const char *str; + struct node *newn; + size_t byte_num; + u8 bit_num, new_dir; + + /* Empty set? */ + if (!set->u.n) { + return set_string(set, set, member); + } + + /* Find closest existing member. */ + str = closest(*set, member); + + /* Find where they differ. */ + for (byte_num = 0; str[byte_num] == member[byte_num]; byte_num++) { + if (member[byte_num] == '\0') { + /* All identical! */ + return false; + } + } + + /* Find which bit differs (if we had ilog8, we'd use it) */ + bit_num = ilog32_nz((u8)str[byte_num] ^ bytes[byte_num]) - 1; + assert(bit_num < CHAR_BIT); + + /* Which direction do we go at this bit? */ + new_dir = ((bytes[byte_num]) >> bit_num) & 1; + + /* Allocate new node. */ + newn = malloc(sizeof(*newn)); + if (!newn) { + /* FIXME */ + return false; + } + newn->nul_byte = '\0'; + newn->byte_num = byte_num; + newn->bit_num = bit_num; + if (unlikely(!set_string(set, &newn->child[new_dir], member))) { + free(newn); + return false; + } + + /* Find where to insert: not closest, but first which differs! */ + np = set; + while (!np->u.s[0]) { + u8 direction = 0; + + /* Special node which represents the empty string will + * break here too! */ + if (np->u.n->byte_num > byte_num) + break; + /* Subtle: bit numbers are "backwards" for comparison */ + if (np->u.n->byte_num == byte_num && np->u.n->bit_num < bit_num) + break; + + if (np->u.n->byte_num < len) { + u8 c = bytes[np->u.n->byte_num]; + direction = (c >> np->u.n->bit_num) & 1; + } + np = &np->u.n->child[direction]; + } + + newn->child[!new_dir]= *np; + np->u.n = newn; + return true; +} + +char *strset_clear(struct strset *set, const char *member) +{ + size_t len = strlen(member); + const u8 *bytes = (const u8 *)member; + struct strset *parent = NULL, *n; + const char *ret = NULL; + u8 direction = 0; /* prevent bogus gcc warning. */ + + /* Empty set? */ + if (!set->u.n) + return NULL; + + /* Find closest, but keep track of parent. */ + n = set; + /* Anything with first byte 0 is a node. */ + while (!n->u.s[0]) { + u8 c = 0; + + /* Special node which represents the empty string. */ + if (unlikely(n->u.n->byte_num == (size_t)-1)) { + const char *empty_str = n->u.n->child[0].u.s; + + if (member[0]) + return NULL; + + /* Sew empty string back so remaining logic works */ + free(n->u.n); + n->u.s = empty_str; + break; + } + + parent = n; + if (n->u.n->byte_num < len) { + c = bytes[n->u.n->byte_num]; + direction = (c >> n->u.n->bit_num) & 1; + } else + direction = 0; + n = &n->u.n->child[direction]; + } + + /* Did we find it? */ + if (!streq(member, n->u.s)) + return NULL; + + ret = n->u.s; + + if (!parent) { + /* We deleted last node. */ + set->u.n = NULL; + } else { + struct node *old = parent->u.n; + /* Raise other node to parent. */ + *parent = old->child[!direction]; + free(old); + } + + return (char *)ret; +} + +static bool iterate(struct strset n, + bool (*handle)(const char *, void *), void *data) +{ + if (n.u.s[0]) + return handle(n.u.s, data); + if (unlikely(n.u.n->byte_num == (size_t)-1)) + return handle(n.u.n->child[0].u.s, data); + + return iterate(n.u.n->child[0], handle, data) + || iterate(n.u.n->child[1], handle, data); +} + +void strset_iterate_(const struct strset *set, + bool (*handle)(const char *, void *), void *data) +{ + /* Empty set? */ + if (!set->u.n) + return; + + iterate(*set, handle, data); +} + +const struct strset *strset_prefix(const struct strset *set, const char *prefix) +{ + const struct strset *n, *top; + size_t len = strlen(prefix); + const u8 *bytes = (const u8 *)prefix; + + /* Empty set -> return empty set. */ + if (!set->u.n) + return set; + + top = n = set; + + /* We walk to find the top, but keep going to check prefix matches. */ + while (!n->u.s[0]) { + u8 c = 0, direction; + + /* Special node which represents the empty string. */ + if (unlikely(n->u.n->byte_num == (size_t)-1)) { + n = &n->u.n->child[0]; + break; + } + + if (n->u.n->byte_num < len) + c = bytes[n->u.n->byte_num]; + + direction = (c >> n->u.n->bit_num) & 1; + n = &n->u.n->child[direction]; + if (c) + top = n; + } + + if (!strstarts(n->u.s, prefix)) { + /* Convenient return for prefixes which do not appear in set. */ + static const struct strset empty_set; + return &empty_set; + } + + return top; +} + +static void destroy(struct strset n) +{ + if (!n.u.s[0]) { + if (likely(n.u.n->byte_num != (size_t)-1)) { + destroy(n.u.n->child[0]); + destroy(n.u.n->child[1]); + } + free(n.u.n); + } +} + +void strset_destroy(struct strset *set) +{ + if (set->u.n) + destroy(*set); + set->u.n = NULL; +}