tdb2: change to using a hash tree.
[ccan] / ccan / tdb2 / test / layout.c
1 /* TDB tools to create various canned database layouts. */
2 #include "layout.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <assert.h>
6 #include "logging.h"
7
8 struct tdb_layout *new_tdb_layout(void)
9 {
10         struct tdb_layout *layout = malloc(sizeof(*layout));
11         layout->num_elems = 0;
12         layout->elem = NULL;
13         return layout;
14 }
15
16 static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
17 {
18         layout->elem = realloc(layout->elem,
19                                sizeof(layout->elem[0])
20                                * (layout->num_elems+1));
21         layout->elem[layout->num_elems++] = elem;
22 }
23
24 void tdb_layout_add_zone(struct tdb_layout *layout,
25                          unsigned int zone_bits,
26                          bool fill_prev)
27 {
28         union tdb_layout_elem elem;
29         if (fill_prev)
30                 tdb_layout_add_free(layout, 0);
31         elem.base.type = ZONE;
32         elem.zone.zone_bits = zone_bits;
33         add(layout, elem);
34 }
35
36 void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len)
37 {
38         union tdb_layout_elem elem;
39         elem.base.type = FREE;
40         elem.free.len = len;
41         add(layout, elem);
42 }
43
44 static struct tdb_data dup_key(struct tdb_data key)
45 {
46         struct tdb_data ret;
47         ret.dsize = key.dsize;
48         ret.dptr = malloc(ret.dsize);
49         memcpy(ret.dptr, key.dptr, ret.dsize);
50         return ret;
51 }
52
53 void tdb_layout_add_used(struct tdb_layout *layout,
54                          TDB_DATA key, TDB_DATA data,
55                          tdb_len_t extra)
56 {
57         union tdb_layout_elem elem;
58         elem.base.type = DATA;
59         elem.used.key = dup_key(key);
60         elem.used.data = dup_key(data);
61         elem.used.extra = extra;
62         add(layout, elem);
63 }
64
65 static tdb_len_t free_record_len(tdb_len_t len)
66 {
67         return sizeof(struct tdb_used_record) + len;
68 }
69
70 static tdb_len_t data_record_len(struct tle_used *used)
71 {
72         tdb_len_t len;
73         len = sizeof(struct tdb_used_record)
74                 + used->key.dsize + used->data.dsize + used->extra;
75         assert(len >= sizeof(struct tdb_free_record));
76         return len;
77 }
78
79 static tdb_len_t hashtable_len(struct tle_hashtable *htable)
80 {
81         return sizeof(struct tdb_used_record)
82                 + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
83                 + htable->extra;
84 }
85
86 static tdb_len_t zone_header_len(struct tle_zone *zone)
87 {
88         return sizeof(struct free_zone_header)
89                 + sizeof(tdb_off_t) * (BUCKETS_FOR_ZONE(zone->zone_bits)+1);
90 }
91
92 static void set_free_record(void *mem, tdb_len_t len)
93 {
94         /* We do all the work in add_to_freetable */
95 }
96
97 static void set_data_record(void *mem, struct tdb_context *tdb,
98                             struct tle_zone *last_zone,
99                             struct tle_used *used)
100 {
101         struct tdb_used_record *u = mem;
102
103         set_header(tdb, u, used->key.dsize, used->data.dsize,
104                    used->key.dsize + used->data.dsize + used->extra,
105                    tdb_hash(tdb, used->key.dptr, used->key.dsize),
106                    last_zone->zone_bits);
107         memcpy(u + 1, used->key.dptr, used->key.dsize);
108         memcpy((char *)(u + 1) + used->key.dsize,
109                used->data.dptr, used->data.dsize);
110 }
111
112 static void set_hashtable(void *mem, struct tdb_context *tdb,
113                           struct tle_zone *last_zone,
114                           struct tle_hashtable *htable)
115 {
116         struct tdb_used_record *u = mem;
117         tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
118
119         set_header(tdb, u, 0, len, len + htable->extra, 0,
120                    last_zone->zone_bits);
121         memset(u + 1, 0, len);
122 }
123
124 static void set_zone(void *mem, struct tdb_context *tdb,
125                      struct tle_zone *zone)
126 {
127         struct free_zone_header *fz = mem;
128         memset(fz, 0, zone_header_len(zone));
129         fz->zone_bits = zone->zone_bits;
130 }
131
132 static void add_to_freetable(struct tdb_context *tdb,
133                              struct tle_zone *last_zone,
134                              tdb_off_t eoff,
135                              tdb_off_t elen)
136 {
137         add_free_record(tdb, last_zone->zone_bits, eoff,
138                         sizeof(struct tdb_used_record) + elen);
139 }
140
141 static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
142 {
143         return group_start
144                 + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
145 }
146
147 /* Get bits from a value. */
148 static uint32_t bits(uint64_t val, unsigned start, unsigned num)
149 {
150         assert(num <= 32);
151         return (val >> start) & ((1U << num) - 1);
152 }
153
154 /* We take bits from the top: that way we can lock whole sections of the hash
155  * by using lock ranges. */
156 static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
157 {
158         *used += num;
159         return bits(h, 64 - *used, num);
160 }
161
162 static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
163                                uint64_t h)
164 {
165         return bucket
166                 | new_off
167                 | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
168                                   TDB_OFF_UPPER_STEAL_EXTRA)
169                    << TDB_OFF_HASH_EXTRA_BIT);
170 }
171
172 /* FIXME: Our hash table handling here is primitive: we don't expand! */
173 static void add_to_hashtable(struct tdb_context *tdb,
174                              tdb_off_t eoff,
175                              struct tdb_data key)
176 {
177         uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
178         tdb_off_t b_off, group_start;
179         unsigned i, group, in_group;
180         unsigned used = 0;
181
182         group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
183         in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
184
185         group_start = offsetof(struct tdb_header, hashtable)
186                 + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
187
188         for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
189                 unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
190
191                 b_off = hbucket_off(group_start, bucket);               
192                 if (tdb_read_off(tdb, b_off) == 0) {
193                         tdb_write_off(tdb, b_off,
194                                       encode_offset(eoff, bucket, h));
195                         return;
196                 }
197         }
198         abort();
199 }
200
201 /* FIXME: Support TDB_CONVERT */
202 struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
203 {
204         unsigned int i;
205         tdb_off_t off, len;
206         tdb_len_t zone_left;
207         char *mem;
208         struct tdb_context *tdb;
209         struct tle_zone *last_zone = NULL;
210
211         assert(layout->elem[0].base.type == ZONE);
212
213         zone_left = 0;
214         off = sizeof(struct tdb_header);
215
216         /* First pass of layout: calc lengths */
217         for (i = 0; i < layout->num_elems; i++) {
218                 union tdb_layout_elem *e = &layout->elem[i];
219                 e->base.off = off;
220                 switch (e->base.type) {
221                 case ZONE:
222                         assert(zone_left == 0);
223                         len = zone_header_len(&e->zone);
224                         zone_left = 1ULL << e->zone.zone_bits;
225                         break;
226                 case FREE:
227                         if (e->free.len == 0)
228                                 e->free.len = zone_left
229                                         - sizeof(struct tdb_used_record);
230                         len = free_record_len(e->free.len);
231                         break;
232                 case DATA:
233                         len = data_record_len(&e->used);
234                         break;
235                 case HASHTABLE:
236                         len = hashtable_len(&e->hashtable);
237                         break;
238                 }
239                 off += len;
240                 assert(zone_left >= len);
241                 zone_left -= len;
242         }
243
244         /* Fill final zone with free record. */
245         if (zone_left != 0) {
246                 tdb_layout_add_free(layout,
247                                     zone_left
248                                     - sizeof(struct tdb_used_record));
249                 layout->elem[layout->num_elems-1].base.off = off;
250                 off += zone_left;
251         }
252
253         mem = malloc(off+1);
254         /* Now populate our header, cribbing from a real TDB header. */
255         tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
256         memcpy(mem, tdb->map_ptr, sizeof(struct tdb_header));
257
258         /* Mug the tdb we have to make it use this. */
259         free(tdb->map_ptr);
260         tdb->map_ptr = mem;
261         tdb->map_size = off+1;
262
263         for (i = 0; i < layout->num_elems; i++) {
264                 union tdb_layout_elem *e = &layout->elem[i];
265                 switch (e->base.type) {
266                 case ZONE:
267                         set_zone(mem + e->base.off, tdb, &e->zone);
268                         last_zone = &e->zone;
269                         break;
270                 case FREE:
271                         set_free_record(mem + e->base.off, e->free.len);
272                         break;
273                 case DATA:
274                         set_data_record(mem + e->base.off, tdb, last_zone,
275                                         &e->used);
276                         break;
277                 case HASHTABLE:
278                         set_hashtable(mem + e->base.off, tdb, last_zone,
279                                       &e->hashtable);
280                         break;
281                 }
282         }
283
284         /* Now fill the free and hash tables. */
285         for (i = 0; i < layout->num_elems; i++) {
286                 union tdb_layout_elem *e = &layout->elem[i];
287                 switch (e->base.type) {
288                 case ZONE:
289                         last_zone = &e->zone;
290                         break;
291                 case FREE:
292                         add_to_freetable(tdb, last_zone,
293                                          e->base.off, e->free.len);
294                         break;
295                 case DATA:
296                         add_to_hashtable(tdb, e->base.off, e->used.key);
297                         break;
298                 default:
299                         break;
300                 }
301         }
302
303         /* Write tailer. */
304         ((uint8_t *)tdb->map_ptr)[tdb->map_size-1] = last_zone->zone_bits;
305         return tdb;
306 }