tdb2: allow multiple chain locks.
[ccan] / ccan / tdb2 / test / layout.c
1 /* TDB tools to create various canned database layouts. */
2 #include "layout.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <assert.h>
6 #include <err.h>
7 #include "logging.h"
8
9 struct tdb_layout *new_tdb_layout(const char *filename)
10 {
11         struct tdb_layout *layout = malloc(sizeof(*layout));
12         layout->filename = filename;
13         layout->num_elems = 0;
14         layout->elem = NULL;
15         return layout;
16 }
17
18 static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
19 {
20         layout->elem = realloc(layout->elem,
21                                sizeof(layout->elem[0])
22                                * (layout->num_elems+1));
23         layout->elem[layout->num_elems++] = elem;
24 }
25
26 void tdb_layout_add_freetable(struct tdb_layout *layout)
27 {
28         union tdb_layout_elem elem;
29         elem.base.type = FREETABLE;
30         add(layout, elem);
31 }
32
33 void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
34                          unsigned ftable)
35 {
36         union tdb_layout_elem elem;
37         elem.base.type = FREE;
38         elem.free.len = len;
39         elem.free.ftable_num = ftable;
40         add(layout, elem);
41 }
42
43 static struct tdb_data dup_key(struct tdb_data key)
44 {
45         struct tdb_data ret;
46         ret.dsize = key.dsize;
47         ret.dptr = malloc(ret.dsize);
48         memcpy(ret.dptr, key.dptr, ret.dsize);
49         return ret;
50 }
51
52 void tdb_layout_add_used(struct tdb_layout *layout,
53                          TDB_DATA key, TDB_DATA data,
54                          tdb_len_t extra)
55 {
56         union tdb_layout_elem elem;
57         elem.base.type = DATA;
58         elem.used.key = dup_key(key);
59         elem.used.data = dup_key(data);
60         elem.used.extra = extra;
61         add(layout, elem);
62 }
63
64 static tdb_len_t free_record_len(tdb_len_t len)
65 {
66         return sizeof(struct tdb_used_record) + len;
67 }
68
69 static tdb_len_t data_record_len(struct tle_used *used)
70 {
71         tdb_len_t len;
72         len = sizeof(struct tdb_used_record)
73                 + used->key.dsize + used->data.dsize + used->extra;
74         assert(len >= sizeof(struct tdb_free_record));
75         return len;
76 }
77
78 static tdb_len_t hashtable_len(struct tle_hashtable *htable)
79 {
80         return sizeof(struct tdb_used_record)
81                 + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
82                 + htable->extra;
83 }
84
85 static tdb_len_t freetable_len(struct tle_freetable *ftable)
86 {
87         return sizeof(struct tdb_freetable);
88 }
89
90 static void set_free_record(void *mem, tdb_len_t len)
91 {
92         /* We do all the work in add_to_freetable */
93 }
94
95 static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
96 {
97         if (extra)
98                 ((char *)(u + 1))[len] = '\0';
99 }
100
101 static void set_data_record(void *mem, struct tdb_context *tdb,
102                             struct tle_used *used)
103 {
104         struct tdb_used_record *u = mem;
105
106         set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
107                    used->key.dsize + used->data.dsize + used->extra,
108                    tdb_hash(tdb, used->key.dptr, used->key.dsize));
109         memcpy(u + 1, used->key.dptr, used->key.dsize);
110         memcpy((char *)(u + 1) + used->key.dsize,
111                used->data.dptr, used->data.dsize);
112         add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
113 }
114
115 static void set_hashtable(void *mem, struct tdb_context *tdb,
116                           struct tle_hashtable *htable)
117 {
118         struct tdb_used_record *u = mem;
119         tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
120
121         set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
122         memset(u + 1, 0, len);
123         add_zero_pad(u, len, htable->extra);
124 }
125
126 static void set_freetable(void *mem, struct tdb_context *tdb,
127                          struct tle_freetable *freetable, struct tdb_header *hdr,
128                          tdb_off_t last_ftable)
129 {
130         struct tdb_freetable *ftable = mem;
131         memset(ftable, 0, sizeof(*ftable));
132         set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
133                         sizeof(*ftable) - sizeof(ftable->hdr),
134                         sizeof(*ftable) - sizeof(ftable->hdr), 0);
135
136         if (last_ftable) {
137                 ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
138                 ftable->next = freetable->base.off;
139         } else {
140                 hdr->free_table = freetable->base.off;
141         }
142 }
143
144 static void add_to_freetable(struct tdb_context *tdb,
145                              tdb_off_t eoff,
146                              tdb_off_t elen,
147                              unsigned ftable,
148                              struct tle_freetable *freetable)
149 {
150         tdb->ftable_off = freetable->base.off;
151         tdb->ftable = ftable;
152         add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen);
153 }
154
155 static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
156 {
157         return group_start
158                 + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
159 }
160
161 /* Get bits from a value. */
162 static uint32_t bits(uint64_t val, unsigned start, unsigned num)
163 {
164         assert(num <= 32);
165         return (val >> start) & ((1U << num) - 1);
166 }
167
168 /* We take bits from the top: that way we can lock whole sections of the hash
169  * by using lock ranges. */
170 static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
171 {
172         *used += num;
173         return bits(h, 64 - *used, num);
174 }
175
176 static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
177                                uint64_t h)
178 {
179         return bucket
180                 | new_off
181                 | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
182                                   TDB_OFF_UPPER_STEAL_EXTRA)
183                    << TDB_OFF_HASH_EXTRA_BIT);
184 }
185
186 /* FIXME: Our hash table handling here is primitive: we don't expand! */
187 static void add_to_hashtable(struct tdb_context *tdb,
188                              tdb_off_t eoff,
189                              struct tdb_data key)
190 {
191         uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
192         tdb_off_t b_off, group_start;
193         unsigned i, group, in_group;
194         unsigned used = 0;
195
196         group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
197         in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
198
199         group_start = offsetof(struct tdb_header, hashtable)
200                 + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
201
202         for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
203                 unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
204
205                 b_off = hbucket_off(group_start, bucket);               
206                 if (tdb_read_off(tdb, b_off) == 0) {
207                         tdb_write_off(tdb, b_off,
208                                       encode_offset(eoff, bucket, h));
209                         return;
210                 }
211         }
212         abort();
213 }
214
215 static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
216 {
217         unsigned i;
218
219         for (i = 0; i < layout->num_elems; i++) {
220                 if (layout->elem[i].base.type != FREETABLE)
221                         continue;
222                 if (num == 0)
223                         return &layout->elem[i].ftable;
224                 num--;
225         }
226         abort();
227 }
228
229 /* FIXME: Support TDB_CONVERT */
230 struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
231 {
232         unsigned int i;
233         tdb_off_t off, len, last_ftable;
234         char *mem;
235         struct tdb_context *tdb;
236
237         off = sizeof(struct tdb_header);
238
239         /* First pass of layout: calc lengths */
240         for (i = 0; i < layout->num_elems; i++) {
241                 union tdb_layout_elem *e = &layout->elem[i];
242                 e->base.off = off;
243                 switch (e->base.type) {
244                 case FREETABLE:
245                         len = freetable_len(&e->ftable);
246                         break;
247                 case FREE:
248                         len = free_record_len(e->free.len);
249                         break;
250                 case DATA:
251                         len = data_record_len(&e->used);
252                         break;
253                 case HASHTABLE:
254                         len = hashtable_len(&e->hashtable);
255                         break;
256                 default:
257                         abort();
258                 }
259                 off += len;
260         }
261
262         mem = malloc(off);
263         /* Fill with some weird pattern. */
264         memset(mem, 0x99, off);
265         /* Now populate our header, cribbing from a real TDB header. */
266         tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
267         memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
268
269         /* Mug the tdb we have to make it use this. */
270         free(tdb->file->map_ptr);
271         tdb->file->map_ptr = mem;
272         tdb->file->map_size = off;
273
274         last_ftable = 0;
275         for (i = 0; i < layout->num_elems; i++) {
276                 union tdb_layout_elem *e = &layout->elem[i];
277                 switch (e->base.type) {
278                 case FREETABLE:
279                         set_freetable(mem + e->base.off, tdb, &e->ftable,
280                                      (struct tdb_header *)mem, last_ftable);
281                         last_ftable = e->base.off;
282                         break;
283                 case FREE:
284                         set_free_record(mem + e->base.off, e->free.len);
285                         break;
286                 case DATA:
287                         set_data_record(mem + e->base.off, tdb, &e->used);
288                         break;
289                 case HASHTABLE:
290                         set_hashtable(mem + e->base.off, tdb, &e->hashtable);
291                         break;
292                 }
293         }
294         /* Must have a free table! */
295         assert(last_ftable);
296
297         /* Now fill the free and hash tables. */
298         for (i = 0; i < layout->num_elems; i++) {
299                 union tdb_layout_elem *e = &layout->elem[i];
300                 switch (e->base.type) {
301                 case FREE:
302                         add_to_freetable(tdb, e->base.off, e->free.len,
303                                          e->free.ftable_num,
304                                          find_ftable(layout, e->free.ftable_num));
305                         break;
306                 case DATA:
307                         add_to_hashtable(tdb, e->base.off, e->used.key);
308                         break;
309                 default:
310                         break;
311                 }
312         }
313
314         tdb->ftable_off = find_ftable(layout, 0)->base.off;
315
316         /* Get physical if they asked for it. */
317         if (layout->filename) {
318                 int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
319                               0600);
320                 if (fd < 0)
321                         err(1, "opening %s for writing", layout->filename);
322                 if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
323                     != tdb->file->map_size)
324                         err(1, "writing %s", layout->filename);
325                 close(fd);
326                 tdb_close(tdb);
327                 /* NOMMAP is for lockcheck. */
328                 tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
329                                &tap_log_attr);
330         }
331
332         return tdb;
333 }
334
335 void tdb_layout_free(struct tdb_layout *layout)
336 {
337         unsigned int i;
338
339         for (i = 0; i < layout->num_elems; i++) {
340                 if (layout->elem[i].base.type == DATA) {
341                         free(layout->elem[i].used.key.dptr);
342                         free(layout->elem[i].used.data.dptr);
343                 }
344         }
345         free(layout->elem);
346         free(layout);
347 }