1 /* This code is based on the public domain code at
2 * http://github.com/agl/critbit writtem by Adam Langley
3 * <agl@imperialviolet.org>.
5 * Here are the main implementation differences:
6 * (1) We don't strdup the string on insert; we use the pointer we're given.
7 * (2) We use a straight bit number rather than a mask; it's simpler.
8 * (3) We don't use the bottom bit of the pointer, but instead use a leading
9 * zero to distinguish nodes from strings.
10 * (4) The empty string (which would look like a node) is handled
11 * using a special "empty node".
12 * (5) Delete returns the string, so you can free it if you want to.
13 * (6) Unions instead of void *, bool instead of int.
15 #include <ccan/strset/strset.h>
16 #include <ccan/short_types/short_types.h>
17 #include <ccan/likely/likely.h>
18 #include <ccan/str/str.h>
19 #include <ccan/ilog/ilog.h>
25 /* To differentiate us from strings. */
27 /* The bit where these children differ. */
29 /* The byte number where first bit differs (-1 == empty string node). */
31 /* These point to strings or nodes. */
32 struct strset child[2];
35 /* Closest member to this in a non-empty set. */
36 static const char *closest(struct strset n, const char *member)
38 size_t len = strlen(member);
39 const u8 *bytes = (const u8 *)member;
41 /* Anything with first byte 0 is a node. */
45 /* Special node which represents the empty string. */
46 if (unlikely(n.u.n->byte_num == (size_t)-1)) {
51 if (n.u.n->byte_num < len) {
52 u8 c = bytes[n.u.n->byte_num];
53 direction = (c >> n.u.n->bit_num) & 1;
55 n = n.u.n->child[direction];
60 char *strset_get(const struct strset *set, const char *member)
66 str = closest(*set, member);
67 if (streq(member, str))
74 static bool set_string(struct strset *set,
75 struct strset *n, const char *member)
77 /* Substitute magic empty node if this is the empty string */
78 if (unlikely(!member[0])) {
79 n->u.n = malloc(sizeof(*n->u.n));
80 if (unlikely(!n->u.n)) {
84 n->u.n->nul_byte = '\0';
85 n->u.n->byte_num = (size_t)-1;
86 /* Attach the string to child[0] */
87 n = &n->u.n->child[0];
93 bool strset_add(struct strset *set, const char *member)
95 size_t len = strlen(member);
96 const u8 *bytes = (const u8 *)member;
105 return set_string(set, set, member);
108 /* Find closest existing member. */
109 str = closest(*set, member);
111 /* Find where they differ. */
112 for (byte_num = 0; str[byte_num] == member[byte_num]; byte_num++) {
113 if (member[byte_num] == '\0') {
120 /* Find which bit differs (if we had ilog8, we'd use it) */
121 bit_num = ilog32_nz((u8)str[byte_num] ^ bytes[byte_num]) - 1;
122 assert(bit_num < CHAR_BIT);
124 /* Which direction do we go at this bit? */
125 new_dir = ((bytes[byte_num]) >> bit_num) & 1;
127 /* Allocate new node. */
128 newn = malloc(sizeof(*newn));
133 newn->nul_byte = '\0';
134 newn->byte_num = byte_num;
135 newn->bit_num = bit_num;
136 if (unlikely(!set_string(set, &newn->child[new_dir], member))) {
141 /* Find where to insert: not closest, but first which differs! */
143 while (!np->u.s[0]) {
146 /* Special node which represents the empty string will
148 if (np->u.n->byte_num > byte_num)
150 /* Subtle: bit numbers are "backwards" for comparison */
151 if (np->u.n->byte_num == byte_num && np->u.n->bit_num < bit_num)
154 if (np->u.n->byte_num < len) {
155 u8 c = bytes[np->u.n->byte_num];
156 direction = (c >> np->u.n->bit_num) & 1;
158 np = &np->u.n->child[direction];
161 newn->child[!new_dir]= *np;
166 char *strset_del(struct strset *set, const char *member)
168 size_t len = strlen(member);
169 const u8 *bytes = (const u8 *)member;
170 struct strset *parent = NULL, *n;
171 const char *ret = NULL;
172 u8 direction = 0; /* prevent bogus gcc warning. */
180 /* Find closest, but keep track of parent. */
182 /* Anything with first byte 0 is a node. */
186 /* Special node which represents the empty string. */
187 if (unlikely(n->u.n->byte_num == (size_t)-1)) {
188 const char *empty_str = n->u.n->child[0].u.s;
195 /* Sew empty string back so remaining logic works */
202 if (n->u.n->byte_num < len) {
203 c = bytes[n->u.n->byte_num];
204 direction = (c >> n->u.n->bit_num) & 1;
207 n = &n->u.n->child[direction];
210 /* Did we find it? */
211 if (!streq(member, n->u.s)) {
219 /* We deleted last node. */
222 struct node *old = parent->u.n;
223 /* Raise other node to parent. */
224 *parent = old->child[!direction];
231 static bool iterate(struct strset n,
232 bool (*handle)(const char *, void *), const void *data)
235 return handle(n.u.s, (void *)data);
236 if (unlikely(n.u.n->byte_num == (size_t)-1))
237 return handle(n.u.n->child[0].u.s, (void *)data);
239 return iterate(n.u.n->child[0], handle, data)
240 && iterate(n.u.n->child[1], handle, data);
243 void strset_iterate_(const struct strset *set,
244 bool (*handle)(const char *, void *), const void *data)
250 iterate(*set, handle, data);
253 const struct strset *strset_prefix(const struct strset *set, const char *prefix)
255 const struct strset *n, *top;
256 size_t len = strlen(prefix);
257 const u8 *bytes = (const u8 *)prefix;
259 /* Empty set -> return empty set. */
265 /* We walk to find the top, but keep going to check prefix matches. */
269 /* Special node which represents the empty string. */
270 if (unlikely(n->u.n->byte_num == (size_t)-1)) {
271 n = &n->u.n->child[0];
275 if (n->u.n->byte_num < len)
276 c = bytes[n->u.n->byte_num];
278 direction = (c >> n->u.n->bit_num) & 1;
279 n = &n->u.n->child[direction];
284 if (!strstarts(n->u.s, prefix)) {
285 /* Convenient return for prefixes which do not appear in set. */
286 static const struct strset empty_set;
293 static void clear(struct strset n)
296 if (likely(n.u.n->byte_num != (size_t)-1)) {
297 clear(n.u.n->child[0]);
298 clear(n.u.n->child[1]);
304 void strset_clear(struct strset *set)