tdb2: don't start again when we coalesce a record.
[ccan] / ccan / tdb2 / test / layout.c
1 /* TDB tools to create various canned database layouts. */
2 #include "layout.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <assert.h>
6 #include <err.h>
7 #include "logging.h"
8
9 struct tdb_layout *new_tdb_layout(const char *filename)
10 {
11         struct tdb_layout *layout = malloc(sizeof(*layout));
12         layout->filename = filename;
13         layout->num_elems = 0;
14         layout->elem = NULL;
15         return layout;
16 }
17
18 static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
19 {
20         layout->elem = realloc(layout->elem,
21                                sizeof(layout->elem[0])
22                                * (layout->num_elems+1));
23         layout->elem[layout->num_elems++] = elem;
24 }
25
26 void tdb_layout_add_freetable(struct tdb_layout *layout)
27 {
28         union tdb_layout_elem elem;
29         elem.base.type = FREETABLE;
30         add(layout, elem);
31 }
32
33 void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
34                          unsigned ftable)
35 {
36         union tdb_layout_elem elem;
37         elem.base.type = FREE;
38         elem.free.len = len;
39         elem.free.ftable_num = ftable;
40         add(layout, elem);
41 }
42
43 static struct tdb_data dup_key(struct tdb_data key)
44 {
45         struct tdb_data ret;
46         ret.dsize = key.dsize;
47         ret.dptr = malloc(ret.dsize);
48         memcpy(ret.dptr, key.dptr, ret.dsize);
49         return ret;
50 }
51
52 void tdb_layout_add_used(struct tdb_layout *layout,
53                          TDB_DATA key, TDB_DATA data,
54                          tdb_len_t extra)
55 {
56         union tdb_layout_elem elem;
57         elem.base.type = DATA;
58         elem.used.key = dup_key(key);
59         elem.used.data = dup_key(data);
60         elem.used.extra = extra;
61         add(layout, elem);
62 }
63
64 static tdb_len_t free_record_len(tdb_len_t len)
65 {
66         return sizeof(struct tdb_used_record) + len;
67 }
68
69 static tdb_len_t data_record_len(struct tle_used *used)
70 {
71         tdb_len_t len;
72         len = sizeof(struct tdb_used_record)
73                 + used->key.dsize + used->data.dsize + used->extra;
74         assert(len >= sizeof(struct tdb_free_record));
75         return len;
76 }
77
78 static tdb_len_t hashtable_len(struct tle_hashtable *htable)
79 {
80         return sizeof(struct tdb_used_record)
81                 + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
82                 + htable->extra;
83 }
84
85 static tdb_len_t freetable_len(struct tle_freetable *ftable)
86 {
87         return sizeof(struct tdb_freetable);
88 }
89
90 static void set_free_record(void *mem, tdb_len_t len)
91 {
92         /* We do all the work in add_to_freetable */
93 }
94
95 static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
96 {
97         if (extra)
98                 ((char *)(u + 1))[len] = '\0';
99 }
100
101 static void set_data_record(void *mem, struct tdb_context *tdb,
102                             struct tle_used *used)
103 {
104         struct tdb_used_record *u = mem;
105
106         set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
107                    used->key.dsize + used->data.dsize + used->extra,
108                    tdb_hash(tdb, used->key.dptr, used->key.dsize));
109         memcpy(u + 1, used->key.dptr, used->key.dsize);
110         memcpy((char *)(u + 1) + used->key.dsize,
111                used->data.dptr, used->data.dsize);
112         add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
113 }
114
115 static void set_hashtable(void *mem, struct tdb_context *tdb,
116                           struct tle_hashtable *htable)
117 {
118         struct tdb_used_record *u = mem;
119         tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
120
121         set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
122         memset(u + 1, 0, len);
123         add_zero_pad(u, len, htable->extra);
124 }
125
126 static void set_freetable(void *mem, struct tdb_context *tdb,
127                          struct tle_freetable *freetable, struct tdb_header *hdr,
128                          tdb_off_t last_ftable)
129 {
130         struct tdb_freetable *ftable = mem;
131         memset(ftable, 0, sizeof(*ftable));
132         set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
133                         sizeof(*ftable) - sizeof(ftable->hdr),
134                         sizeof(*ftable) - sizeof(ftable->hdr), 0);
135
136         if (last_ftable) {
137                 ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
138                 ftable->next = freetable->base.off;
139         } else {
140                 hdr->free_table = freetable->base.off;
141         }
142 }
143
144 static void add_to_freetable(struct tdb_context *tdb,
145                              tdb_off_t eoff,
146                              tdb_off_t elen,
147                              unsigned ftable,
148                              struct tle_freetable *freetable)
149 {
150         tdb->ftable_off = freetable->base.off;
151         tdb->ftable = ftable;
152         add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
153                         TDB_LOCK_WAIT);
154 }
155
156 static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
157 {
158         return group_start
159                 + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
160 }
161
162 /* Get bits from a value. */
163 static uint32_t bits(uint64_t val, unsigned start, unsigned num)
164 {
165         assert(num <= 32);
166         return (val >> start) & ((1U << num) - 1);
167 }
168
169 /* We take bits from the top: that way we can lock whole sections of the hash
170  * by using lock ranges. */
171 static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
172 {
173         *used += num;
174         return bits(h, 64 - *used, num);
175 }
176
177 static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
178                                uint64_t h)
179 {
180         return bucket
181                 | new_off
182                 | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
183                                   TDB_OFF_UPPER_STEAL_EXTRA)
184                    << TDB_OFF_HASH_EXTRA_BIT);
185 }
186
187 /* FIXME: Our hash table handling here is primitive: we don't expand! */
188 static void add_to_hashtable(struct tdb_context *tdb,
189                              tdb_off_t eoff,
190                              struct tdb_data key)
191 {
192         uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
193         tdb_off_t b_off, group_start;
194         unsigned i, group, in_group;
195         unsigned used = 0;
196
197         group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
198         in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
199
200         group_start = offsetof(struct tdb_header, hashtable)
201                 + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
202
203         for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
204                 unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
205
206                 b_off = hbucket_off(group_start, bucket);               
207                 if (tdb_read_off(tdb, b_off) == 0) {
208                         tdb_write_off(tdb, b_off,
209                                       encode_offset(eoff, bucket, h));
210                         return;
211                 }
212         }
213         abort();
214 }
215
216 static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
217 {
218         unsigned i;
219
220         for (i = 0; i < layout->num_elems; i++) {
221                 if (layout->elem[i].base.type != FREETABLE)
222                         continue;
223                 if (num == 0)
224                         return &layout->elem[i].ftable;
225                 num--;
226         }
227         abort();
228 }
229
230 /* FIXME: Support TDB_CONVERT */
231 struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
232 {
233         unsigned int i;
234         tdb_off_t off, len, last_ftable;
235         char *mem;
236         struct tdb_context *tdb;
237
238         off = sizeof(struct tdb_header);
239
240         /* First pass of layout: calc lengths */
241         for (i = 0; i < layout->num_elems; i++) {
242                 union tdb_layout_elem *e = &layout->elem[i];
243                 e->base.off = off;
244                 switch (e->base.type) {
245                 case FREETABLE:
246                         len = freetable_len(&e->ftable);
247                         break;
248                 case FREE:
249                         len = free_record_len(e->free.len);
250                         break;
251                 case DATA:
252                         len = data_record_len(&e->used);
253                         break;
254                 case HASHTABLE:
255                         len = hashtable_len(&e->hashtable);
256                         break;
257                 default:
258                         abort();
259                 }
260                 off += len;
261         }
262
263         mem = malloc(off);
264         /* Fill with some weird pattern. */
265         memset(mem, 0x99, off);
266         /* Now populate our header, cribbing from a real TDB header. */
267         tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
268         memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
269
270         /* Mug the tdb we have to make it use this. */
271         free(tdb->file->map_ptr);
272         tdb->file->map_ptr = mem;
273         tdb->file->map_size = off;
274
275         last_ftable = 0;
276         for (i = 0; i < layout->num_elems; i++) {
277                 union tdb_layout_elem *e = &layout->elem[i];
278                 switch (e->base.type) {
279                 case FREETABLE:
280                         set_freetable(mem + e->base.off, tdb, &e->ftable,
281                                      (struct tdb_header *)mem, last_ftable);
282                         last_ftable = e->base.off;
283                         break;
284                 case FREE:
285                         set_free_record(mem + e->base.off, e->free.len);
286                         break;
287                 case DATA:
288                         set_data_record(mem + e->base.off, tdb, &e->used);
289                         break;
290                 case HASHTABLE:
291                         set_hashtable(mem + e->base.off, tdb, &e->hashtable);
292                         break;
293                 }
294         }
295         /* Must have a free table! */
296         assert(last_ftable);
297
298         /* Now fill the free and hash tables. */
299         for (i = 0; i < layout->num_elems; i++) {
300                 union tdb_layout_elem *e = &layout->elem[i];
301                 switch (e->base.type) {
302                 case FREE:
303                         add_to_freetable(tdb, e->base.off, e->free.len,
304                                          e->free.ftable_num,
305                                          find_ftable(layout, e->free.ftable_num));
306                         break;
307                 case DATA:
308                         add_to_hashtable(tdb, e->base.off, e->used.key);
309                         break;
310                 default:
311                         break;
312                 }
313         }
314
315         tdb->ftable_off = find_ftable(layout, 0)->base.off;
316
317         /* Get physical if they asked for it. */
318         if (layout->filename) {
319                 int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
320                               0600);
321                 if (fd < 0)
322                         err(1, "opening %s for writing", layout->filename);
323                 if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
324                     != tdb->file->map_size)
325                         err(1, "writing %s", layout->filename);
326                 close(fd);
327                 tdb_close(tdb);
328                 /* NOMMAP is for lockcheck. */
329                 tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
330                                &tap_log_attr);
331         }
332
333         return tdb;
334 }
335
336 void tdb_layout_free(struct tdb_layout *layout)
337 {
338         unsigned int i;
339
340         for (i = 0; i < layout->num_elems; i++) {
341                 if (layout->elem[i].base.type == DATA) {
342                         free(layout->elem[i].used.key.dptr);
343                         free(layout->elem[i].used.data.dptr);
344                 }
345         }
346         free(layout->elem);
347         free(layout);
348 }