2 * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se).
3 * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com).
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* This is a heavily modified version of the Patricia tree implementation
30 in PCC at http://pcc.zentus.com/cgi-bin/cvsweb.cgi/cc/cpp/cpp.c?rev=1.96 */
32 #include <ccan/stringmap/stringmap.h>
34 //#define CONSISTENCY_CHECK
39 #define assert(...) do {} while(0)
42 #define PEEK_BIT(key, bit) ((key[bit >> 3] >> (~bit & 7)) & 1)
44 struct stringmap_node {
45 uint32_t left_is_leaf:1, right_is_leaf:1, bitno:30;
46 struct stringmap_node *lr[2];
54 static inline struct T *leaf(struct stringmap_node *n, int lr) {
55 assert(lr ? n->right_is_leaf : n->left_is_leaf);
56 return (struct T*)n->lr[lr];
59 /* Normal nodes diverge because there was a 0 or 1 difference. If left_ends(n),
60 then the node diverges because one string ends and the rest don't. */
61 static inline int left_ends(struct stringmap_node *n) {
62 return (n->left_is_leaf && (leaf(n,0)->len << 3)==n->bitno);
65 static void *T_new(struct block_pool *bp, const char *key, size_t len, size_t T_size) {
66 struct T *leaf = block_pool_alloc(bp, T_size);
67 memset(leaf, 0, T_size);
69 leaf->str = block_pool_alloc_align(bp, len+1, 1);
70 memcpy(leaf->str, key, len);
77 //used for diagnostics
78 static int consistency_check(struct stringmap *t);
79 static void emit_dot(struct stringmap *t);
80 static void emit_subtree(struct stringmap_node *n, int is_leaf);
82 void *stringmap_lookup_real(struct stringmap *t, const char *key, size_t len, int enterf, size_t T_size) {
84 struct stringmap_node *w, *new, *last;
85 uint32_t cix, bit, svbit, ix, bitno, end_bit;
88 (void) consistency_check;
90 #ifdef STRINGMAP_EMIT_DOT
93 #ifdef CONSISTENCY_CHECK
97 /* If key length wasn't supplied, calculate it. */
98 if (len == (size_t)-1)
102 /* If tree is empty, create the first node. */
107 t->bp = block_pool_new(t->bp);
109 t->root = T_new(t->bp, key, len, T_size);
115 /* Follow the tree down to what might be the target key. */
122 if (!left_ends(w)) //0 or 1
123 bit = w->bitno < end_bit ? PEEK_BIT(key, w->bitno) : 0;
124 else //ends or doesn't end
125 bit = (w->bitno != end_bit);
126 svbit = bit ? w->right_is_leaf : w->left_is_leaf;
133 /* See if the strings match. If not, set cix to the first bit offset
134 where there's a difference, and bit to the side on which to put
139 for (cix = 0; ; m++, k++, cix++) {
140 if (cix>=sp->len || cix>=len) { //we reached the end of one or both strings
141 if (cix==sp->len && cix==len) { //strings match
142 //if (!enterf && sp->value == NULL)
148 //put the shorter key to the left
153 if (*m != *k) { //the strings have a differing character
156 //advance cix to the first differing bit
158 while ((ix & 128) == 0)
161 //choose left/right based on the differing bit
162 bit = PEEK_BIT(key, cix);
169 return NULL; /* no string found and do not enter */
171 /* Create new node */
172 new = block_pool_alloc(t->bp, sizeof *new);
174 new->right_is_leaf = bit;
175 new->left_is_leaf = !bit;
178 new->lr[bit] = T_new(t->bp, key, len, T_size);
180 if (t->count++ == 1) {
181 new->lr[!bit] = t->root;
182 new->right_is_leaf = 1;
183 new->left_is_leaf = 1;
185 return (struct T *)new->lr[bit];
195 if (!left_ends(w)) { //0 or 1
198 svbit = PEEK_BIT(key, bitno);
200 } else { //ends or doesn't end
201 //because left is an end, we cannot split it, so we must turn right
207 if (svbit ? last->right_is_leaf : last->left_is_leaf) {
208 //w is a leaf, so mark it accordingly in its parent structure
210 new->right_is_leaf = 1;
212 new->left_is_leaf = 1;
222 last->lr[svbit] = new;
224 last->right_is_leaf = 0;
226 last->left_is_leaf = 0;
229 return (struct T *)new->lr[bit];
232 static int consistency_check_subtree(struct stringmap_node *n) {
233 uint32_t bitno = n->bitno;
236 //make sure bitnos ascend (must ascend unless left ends)
237 if (!n->left_is_leaf && bitno >= n->lr[0]->bitno) {
238 printf("Left leaf has bitno >= than parent\n");
241 if (!n->right_is_leaf && bitno >= n->lr[1]->bitno) {
242 if (left_ends(n) && bitno == n->lr[1]->bitno) {
243 //fine, there's a shelf here
245 printf("Right leaf has bitno >= than parent\n");
250 //make sure eponymous bits are set properly
251 if (n->left_is_leaf) {
252 struct T *lf = leaf(n, 0);
253 size_t len = lf->len << 3;
254 if (len == n->bitno) {
256 } else if (len <= n->bitno) {
257 printf("Left leaf is too short\n");
259 } else if (PEEK_BIT(lf->str, n->bitno) == 1) {
260 printf("Left leaf has incorrect bit\n");
264 if (n->right_is_leaf) {
265 struct T *lf = leaf(n, 1);
266 size_t len = lf->len << 3;
267 if (len <= n->bitno) {
268 printf("Right leaf is too short\n");
270 } else if (PEEK_BIT(lf->str, n->bitno) == 0 && !left_ends(n)) {
271 printf("Right leaf has incorrect bit\n");
277 //emit_subtree(n, 0);
282 return (!n->left_is_leaf ? consistency_check_subtree(n->lr[0]) : 1) &&
283 (!n->right_is_leaf ? consistency_check_subtree(n->lr[1]) : 1);
286 static int consistency_check(struct stringmap *t) {
289 return consistency_check_subtree(t->root);
292 //The following can be used to create Graphviz "dot" files to visualize the tree
294 static void leaf_to_dot(void *lp, FILE *f) {
296 size_t bit_count = leaf->len << 3;
301 for (i=0; i<bit_count; i++) {
302 putc(PEEK_BIT(leaf->str, i) ? '1' : '0', f);
303 if (((i+1) & 7) == 0)
304 fputs("\\n", f); //add newlines between bytes
308 fprintf(f, "(%s)\"\n", leaf->str);
311 static void node_to_dot(struct stringmap_node *n, FILE *f, size_t level) {
312 //don't draw ridiculously huge trees
316 fprintf(f, "%zu [label=\"[%zu] %u\"]\n", (size_t)n, level, n->bitno);
318 if (n->left_is_leaf) {
319 fprintf(f, "%zu -> ", (size_t)n);
320 leaf_to_dot(n->lr[0], f);
322 fprintf(f, "%zu -> %zu \n", (size_t)n, (size_t)n->lr[0]);
323 node_to_dot(n->lr[0], f, level+1);
326 if (n->right_is_leaf) {
327 fprintf(f, "%zu -> ", (size_t)n);
328 leaf_to_dot(n->lr[1], f);
330 fprintf(f, "%zu -> %zu \n", (size_t)n, (size_t)n->lr[1]);
331 node_to_dot(n->lr[1], f, level+1);
335 static void stringmap_subtree_to_dot(struct stringmap_node *n, int is_leaf, const char *filename_out) {
336 FILE *f = fopen(filename_out, "w");
338 fputs("digraph G {\n", f);
343 node_to_dot(n, f, 0);
349 static size_t dot_file_number = 0;
351 static void emit_subtree(struct stringmap_node *n, int is_leaf) {
353 sprintf(buf, "dot/%04zu.dot", dot_file_number++);
354 stringmap_subtree_to_dot(n, is_leaf, buf);
357 static void emit_dot(struct stringmap *t) {
359 emit_subtree(t->root, t->count==1);