tdb2: get rid of zones
[ccan] / ccan / tdb2 / test / layout.c
1 /* TDB tools to create various canned database layouts. */
2 #include "layout.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <assert.h>
6 #include <err.h>
7 #include "logging.h"
8
9 struct tdb_layout *new_tdb_layout(const char *filename)
10 {
11         struct tdb_layout *layout = malloc(sizeof(*layout));
12         layout->filename = filename;
13         layout->num_elems = 0;
14         layout->elem = NULL;
15         return layout;
16 }
17
18 static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
19 {
20         layout->elem = realloc(layout->elem,
21                                sizeof(layout->elem[0])
22                                * (layout->num_elems+1));
23         layout->elem[layout->num_elems++] = elem;
24 }
25
26 void tdb_layout_add_freelist(struct tdb_layout *layout)
27 {
28         union tdb_layout_elem elem;
29         elem.base.type = FREELIST;
30         add(layout, elem);
31 }
32
33 void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len)
34 {
35         union tdb_layout_elem elem;
36         elem.base.type = FREE;
37         elem.free.len = len;
38         add(layout, elem);
39 }
40
41 static struct tdb_data dup_key(struct tdb_data key)
42 {
43         struct tdb_data ret;
44         ret.dsize = key.dsize;
45         ret.dptr = malloc(ret.dsize);
46         memcpy(ret.dptr, key.dptr, ret.dsize);
47         return ret;
48 }
49
50 void tdb_layout_add_used(struct tdb_layout *layout,
51                          TDB_DATA key, TDB_DATA data,
52                          tdb_len_t extra)
53 {
54         union tdb_layout_elem elem;
55         elem.base.type = DATA;
56         elem.used.key = dup_key(key);
57         elem.used.data = dup_key(data);
58         elem.used.extra = extra;
59         add(layout, elem);
60 }
61
62 static tdb_len_t free_record_len(tdb_len_t len)
63 {
64         return sizeof(struct tdb_used_record) + len;
65 }
66
67 static tdb_len_t data_record_len(struct tle_used *used)
68 {
69         tdb_len_t len;
70         len = sizeof(struct tdb_used_record)
71                 + used->key.dsize + used->data.dsize + used->extra;
72         assert(len >= sizeof(struct tdb_free_record));
73         return len;
74 }
75
76 static tdb_len_t hashtable_len(struct tle_hashtable *htable)
77 {
78         return sizeof(struct tdb_used_record)
79                 + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
80                 + htable->extra;
81 }
82
83 static tdb_len_t freelist_len(struct tle_freelist *flist)
84 {
85         return sizeof(struct tdb_freelist);
86 }
87
88 static void set_free_record(void *mem, tdb_len_t len)
89 {
90         /* We do all the work in add_to_freetable */
91 }
92
93 static void set_data_record(void *mem, struct tdb_context *tdb,
94                             struct tle_used *used)
95 {
96         struct tdb_used_record *u = mem;
97
98         set_header(tdb, u, used->key.dsize, used->data.dsize,
99                    used->key.dsize + used->data.dsize + used->extra,
100                    tdb_hash(tdb, used->key.dptr, used->key.dsize));
101         memcpy(u + 1, used->key.dptr, used->key.dsize);
102         memcpy((char *)(u + 1) + used->key.dsize,
103                used->data.dptr, used->data.dsize);
104 }
105
106 static void set_hashtable(void *mem, struct tdb_context *tdb,
107                           struct tle_hashtable *htable)
108 {
109         struct tdb_used_record *u = mem;
110         tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
111
112         set_header(tdb, u, 0, len, len + htable->extra, 0);
113         memset(u + 1, 0, len);
114 }
115
116 static void set_freelist(void *mem, struct tdb_context *tdb,
117                          struct tle_freelist *freelist)
118 {
119         struct tdb_freelist *flist = mem;
120         memset(flist, 0, sizeof(*flist));
121         set_header(tdb, &flist->hdr, 0,
122                    sizeof(*flist) - sizeof(flist->hdr),
123                    sizeof(*flist) - sizeof(flist->hdr), 1);
124 }
125
126 static void add_to_freetable(struct tdb_context *tdb,
127                              tdb_off_t eoff,
128                              tdb_off_t elen)
129 {
130         add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen);
131 }
132
133 static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
134 {
135         return group_start
136                 + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
137 }
138
139 /* Get bits from a value. */
140 static uint32_t bits(uint64_t val, unsigned start, unsigned num)
141 {
142         assert(num <= 32);
143         return (val >> start) & ((1U << num) - 1);
144 }
145
146 /* We take bits from the top: that way we can lock whole sections of the hash
147  * by using lock ranges. */
148 static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
149 {
150         *used += num;
151         return bits(h, 64 - *used, num);
152 }
153
154 static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
155                                uint64_t h)
156 {
157         return bucket
158                 | new_off
159                 | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
160                                   TDB_OFF_UPPER_STEAL_EXTRA)
161                    << TDB_OFF_HASH_EXTRA_BIT);
162 }
163
164 /* FIXME: Our hash table handling here is primitive: we don't expand! */
165 static void add_to_hashtable(struct tdb_context *tdb,
166                              tdb_off_t eoff,
167                              struct tdb_data key)
168 {
169         uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
170         tdb_off_t b_off, group_start;
171         unsigned i, group, in_group;
172         unsigned used = 0;
173
174         group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
175         in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
176
177         group_start = offsetof(struct tdb_header, hashtable)
178                 + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
179
180         for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
181                 unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
182
183                 b_off = hbucket_off(group_start, bucket);               
184                 if (tdb_read_off(tdb, b_off) == 0) {
185                         tdb_write_off(tdb, b_off,
186                                       encode_offset(eoff, bucket, h));
187                         return;
188                 }
189         }
190         abort();
191 }
192
193 /* FIXME: Support TDB_CONVERT */
194 struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
195 {
196         unsigned int i;
197         tdb_off_t off, len, flist_off = 0;
198         char *mem;
199         struct tdb_context *tdb;
200
201         off = sizeof(struct tdb_header);
202
203         /* First pass of layout: calc lengths */
204         for (i = 0; i < layout->num_elems; i++) {
205                 union tdb_layout_elem *e = &layout->elem[i];
206                 e->base.off = off;
207                 switch (e->base.type) {
208                 case FREELIST:
209                         assert(flist_off == 0);
210                         flist_off = off;
211                         len = freelist_len(&e->flist);
212                         break;
213                 case FREE:
214                         len = free_record_len(e->free.len);
215                         break;
216                 case DATA:
217                         len = data_record_len(&e->used);
218                         break;
219                 case HASHTABLE:
220                         len = hashtable_len(&e->hashtable);
221                         break;
222                 default:
223                         abort();
224                 }
225                 off += len;
226         }
227         /* Must have a free list! */
228         assert(flist_off);
229
230         mem = malloc(off);
231         /* Now populate our header, cribbing from a real TDB header. */
232         tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
233         memcpy(mem, tdb->map_ptr, sizeof(struct tdb_header));
234
235         /* Mug the tdb we have to make it use this. */
236         free(tdb->map_ptr);
237         tdb->map_ptr = mem;
238         tdb->map_size = off;
239         tdb->flist_off = flist_off;
240
241         for (i = 0; i < layout->num_elems; i++) {
242                 union tdb_layout_elem *e = &layout->elem[i];
243                 switch (e->base.type) {
244                 case FREELIST:
245                         set_freelist(mem + e->base.off, tdb, &e->flist);
246                         break;
247                 case FREE:
248                         set_free_record(mem + e->base.off, e->free.len);
249                         break;
250                 case DATA:
251                         set_data_record(mem + e->base.off, tdb, &e->used);
252                         break;
253                 case HASHTABLE:
254                         set_hashtable(mem + e->base.off, tdb, &e->hashtable);
255                         break;
256                 }
257         }
258
259         /* Now fill the free and hash tables. */
260         for (i = 0; i < layout->num_elems; i++) {
261                 union tdb_layout_elem *e = &layout->elem[i];
262                 switch (e->base.type) {
263                 case FREE:
264                         add_to_freetable(tdb, e->base.off, e->free.len);
265                         break;
266                 case DATA:
267                         add_to_hashtable(tdb, e->base.off, e->used.key);
268                         break;
269                 default:
270                         break;
271                 }
272         }
273
274         /* Get physical if they asked for it. */
275         if (layout->filename) {
276                 int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
277                               0600);
278                 if (fd < 0)
279                         err(1, "opening %s for writing", layout->filename);
280                 if (write(fd, tdb->map_ptr, tdb->map_size) != tdb->map_size)
281                         err(1, "writing %s", layout->filename);
282                 close(fd);
283                 tdb_close(tdb);
284                 /* NOMMAP is for lockcheck. */
285                 tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
286                                &tap_log_attr);
287         }
288
289         return tdb;
290 }