]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/test/layout.c
tdb2: use counters to decide when to coalesce records.
[ccan] / ccan / tdb2 / test / layout.c
index 87718d31cba4db878550b3d1ecee230800cc79d1..6fcee6d482a581f35d4974f29de161b502b4682a 100644 (file)
@@ -3,14 +3,15 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <err.h>
 #include "logging.h"
 
-struct tdb_layout *new_tdb_layout(void)
+struct tdb_layout *new_tdb_layout(const char *filename)
 {
        struct tdb_layout *layout = malloc(sizeof(*layout));
+       layout->filename = filename;
        layout->num_elems = 0;
        layout->elem = NULL;
-       layout->ftable = layout->htable = -1;
        return layout;
 }
 
@@ -22,11 +23,20 @@ static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
        layout->elem[layout->num_elems++] = elem;
 }
 
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len)
+void tdb_layout_add_freetable(struct tdb_layout *layout)
+{
+       union tdb_layout_elem elem;
+       elem.base.type = FREETABLE;
+       add(layout, elem);
+}
+
+void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
+                        unsigned ftable)
 {
        union tdb_layout_elem elem;
        elem.base.type = FREE;
        elem.free.len = len;
+       elem.free.ftable_num = ftable;
        add(layout, elem);
 }
 
@@ -51,36 +61,6 @@ void tdb_layout_add_used(struct tdb_layout *layout,
        add(layout, elem);
 }
 
-void tdb_layout_add_hashtable(struct tdb_layout *layout,
-                             unsigned int hash_bits,
-                             tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = HASHTABLE;
-       elem.hashtable.hash_bits = hash_bits;
-       elem.hashtable.extra = extra;
-       assert(layout->htable == -1U);
-       layout->htable = layout->num_elems;
-       add(layout, elem);
-}
-
-void tdb_layout_add_freetable(struct tdb_layout *layout,
-                             unsigned int num_zones,
-                             unsigned int zone_bits,
-                             unsigned int num_buckets,
-                             tdb_len_t extra)
-{
-       union tdb_layout_elem elem;
-       elem.base.type = FREETABLE;
-       elem.freetable.num_zones = num_zones;
-       elem.freetable.zone_bits = zone_bits;
-       elem.freetable.num_buckets = num_buckets;
-       elem.freetable.extra = extra;
-       assert(layout->ftable == -1U);
-       layout->ftable = layout->num_elems;
-       add(layout, elem);
-}
-
 static tdb_len_t free_record_len(tdb_len_t len)
 {
        return sizeof(struct tdb_used_record) + len;
@@ -98,14 +78,13 @@ static tdb_len_t data_record_len(struct tle_used *used)
 static tdb_len_t hashtable_len(struct tle_hashtable *htable)
 {
        return sizeof(struct tdb_used_record)
-               + (sizeof(tdb_off_t) << htable->hash_bits);
+               + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+               + htable->extra;
 }
 
 static tdb_len_t freetable_len(struct tle_freetable *ftable)
 {
-       return sizeof(struct tdb_used_record)
-               + (sizeof(tdb_off_t) * ftable->num_zones
-                  * (ftable->num_buckets + 1));
+       return sizeof(struct tdb_freetable);
 }
 
 static void set_free_record(void *mem, tdb_len_t len)
@@ -113,118 +92,195 @@ static void set_free_record(void *mem, tdb_len_t len)
        /* We do all the work in add_to_freetable */
 }
 
+static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
+{
+       if (extra)
+               ((char *)(u + 1))[len] = '\0';
+}
+
 static void set_data_record(void *mem, struct tdb_context *tdb,
                            struct tle_used *used)
 {
        struct tdb_used_record *u = mem;
 
-       set_header(tdb, u, used->key.dsize, used->data.dsize,
+       set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
                   used->key.dsize + used->data.dsize + used->extra,
                   tdb_hash(tdb, used->key.dptr, used->key.dsize));
        memcpy(u + 1, used->key.dptr, used->key.dsize);
        memcpy((char *)(u + 1) + used->key.dsize,
               used->data.dptr, used->data.dsize);
+       add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
 }
 
 static void set_hashtable(void *mem, struct tdb_context *tdb,
                          struct tle_hashtable *htable)
 {
        struct tdb_used_record *u = mem;
-       tdb_len_t len = sizeof(tdb_off_t) << htable->hash_bits;
+       tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
 
-       set_header(tdb, u, 0, len, len + htable->extra, 0);
+       set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
        memset(u + 1, 0, len);
+       add_zero_pad(u, len, htable->extra);
 }
 
 static void set_freetable(void *mem, struct tdb_context *tdb,
-                         struct tle_freetable *ftable)
+                        struct tle_freetable *freetable, struct tdb_header *hdr,
+                        tdb_off_t last_ftable)
 {
-       struct tdb_used_record *u = mem;
-       tdb_len_t len = sizeof(tdb_off_t) * ftable->num_zones
-               * (ftable->num_buckets + 1);
-       set_header(tdb, u, 0, len, len + ftable->extra, 0);
-       memset(u + 1, 0, len);
+       struct tdb_freetable *ftable = mem;
+       memset(ftable, 0, sizeof(*ftable));
+       set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
+                       sizeof(*ftable) - sizeof(ftable->hdr),
+                       sizeof(*ftable) - sizeof(ftable->hdr), 0);
+
+       if (last_ftable) {
+               ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
+               ftable->next = freetable->base.off;
+       } else {
+               hdr->free_table = freetable->base.off;
+       }
 }
 
 static void add_to_freetable(struct tdb_context *tdb,
                             tdb_off_t eoff,
-                            tdb_off_t elen)
+                            tdb_off_t elen,
+                            unsigned ftable,
+                            struct tle_freetable *freetable)
+{
+       tdb->ftable_off = freetable->base.off;
+       tdb->ftable = ftable;
+       add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
+                       TDB_LOCK_WAIT, false);
+}
+
+static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
+{
+       return group_start
+               + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
+}
+
+/* Get bits from a value. */
+static uint32_t bits(uint64_t val, unsigned start, unsigned num)
+{
+       assert(num <= 32);
+       return (val >> start) & ((1U << num) - 1);
+}
+
+/* We take bits from the top: that way we can lock whole sections of the hash
+ * by using lock ranges. */
+static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
 {
-       add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen);
+       *used += num;
+       return bits(h, 64 - *used, num);
 }
 
+static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
+                              uint64_t h)
+{
+       return bucket
+               | new_off
+               | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
+                                 TDB_OFF_UPPER_STEAL_EXTRA)
+                  << TDB_OFF_HASH_EXTRA_BIT);
+}
+
+/* FIXME: Our hash table handling here is primitive: we don't expand! */
 static void add_to_hashtable(struct tdb_context *tdb,
                             tdb_off_t eoff,
                             struct tdb_data key)
 {
-       uint64_t hash = tdb_hash(tdb, key.dptr, key.dsize);
-       tdb_off_t hoff;
+       uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
+       tdb_off_t b_off, group_start;
+       unsigned i, group, in_group;
+       unsigned used = 0;
+
+       group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
+       in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
+
+       group_start = offsetof(struct tdb_header, hashtable)
+               + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
 
-       while (tdb_read_off(tdb, hoff = hash_off(tdb, hash)) != 0)
-               hash++;
+       for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
+               unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
 
-       tdb_write_off(tdb, hoff, eoff);
+               b_off = hbucket_off(group_start, bucket);               
+               if (tdb_read_off(tdb, b_off) == 0) {
+                       tdb_write_off(tdb, b_off,
+                                     encode_offset(eoff, bucket, h));
+                       return;
+               }
+       }
+       abort();
+}
+
+static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
+{
+       unsigned i;
+
+       for (i = 0; i < layout->num_elems; i++) {
+               if (layout->elem[i].base.type != FREETABLE)
+                       continue;
+               if (num == 0)
+                       return &layout->elem[i].ftable;
+               num--;
+       }
+       abort();
 }
 
 /* FIXME: Support TDB_CONVERT */
 struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
 {
        unsigned int i;
-       tdb_off_t len;
-       struct tdb_header *hdr;
+       tdb_off_t off, len, last_ftable;
        char *mem;
        struct tdb_context *tdb;
 
-       assert(layout->ftable != -1U);
-       assert(layout->htable != -1U);
-
-       len = sizeof(struct tdb_header);
+       off = sizeof(struct tdb_header);
 
        /* First pass of layout: calc lengths */
        for (i = 0; i < layout->num_elems; i++) {
                union tdb_layout_elem *e = &layout->elem[i];
-               e->base.off = len;
+               e->base.off = off;
                switch (e->base.type) {
+               case FREETABLE:
+                       len = freetable_len(&e->ftable);
+                       break;
                case FREE:
-                       len += free_record_len(e->free.len);
+                       len = free_record_len(e->free.len);
                        break;
                case DATA:
-                       len += data_record_len(&e->used);
+                       len = data_record_len(&e->used);
                        break;
                case HASHTABLE:
-                       len += hashtable_len(&e->hashtable);
-                       break;
-               case FREETABLE:
-                       len += freetable_len(&e->freetable);
+                       len = hashtable_len(&e->hashtable);
                        break;
+               default:
+                       abort();
                }
+               off += len;
        }
 
-       mem = malloc(len);
+       mem = malloc(off);
+       /* Fill with some weird pattern. */
+       memset(mem, 0x99, off);
        /* Now populate our header, cribbing from a real TDB header. */
        tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
-       hdr = (void *)mem;
-       *hdr = tdb->header;
-       hdr->v.generation++;
-       hdr->v.num_zones = layout->elem[layout->ftable].freetable.num_zones;
-       hdr->v.zone_bits = layout->elem[layout->ftable].freetable.zone_bits;
-       hdr->v.free_buckets
-               = layout->elem[layout->ftable].freetable.num_buckets;
-       hdr->v.free_off = layout->elem[layout->ftable].base.off
-               + sizeof(struct tdb_used_record);
-       hdr->v.hash_bits = layout->elem[layout->htable].hashtable.hash_bits;
-       hdr->v.hash_off = layout->elem[layout->htable].base.off
-               + sizeof(struct tdb_used_record);
+       memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
 
        /* Mug the tdb we have to make it use this. */
-       free(tdb->map_ptr);
-       tdb->map_ptr = mem;
-       tdb->map_size = len;
-       header_changed(tdb);
+       free(tdb->file->map_ptr);
+       tdb->file->map_ptr = mem;
+       tdb->file->map_size = off;
 
+       last_ftable = 0;
        for (i = 0; i < layout->num_elems; i++) {
                union tdb_layout_elem *e = &layout->elem[i];
                switch (e->base.type) {
+               case FREETABLE:
+                       set_freetable(mem + e->base.off, tdb, &e->ftable,
+                                    (struct tdb_header *)mem, last_ftable);
+                       last_ftable = e->base.off;
+                       break;
                case FREE:
                        set_free_record(mem + e->base.off, e->free.len);
                        break;
@@ -234,18 +290,19 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
                case HASHTABLE:
                        set_hashtable(mem + e->base.off, tdb, &e->hashtable);
                        break;
-               case FREETABLE:
-                       set_freetable(mem + e->base.off, tdb, &e->freetable);
-                       break;
                }
        }
+       /* Must have a free table! */
+       assert(last_ftable);
 
        /* Now fill the free and hash tables. */
        for (i = 0; i < layout->num_elems; i++) {
                union tdb_layout_elem *e = &layout->elem[i];
                switch (e->base.type) {
                case FREE:
-                       add_to_freetable(tdb, e->base.off, e->free.len);
+                       add_to_freetable(tdb, e->base.off, e->free.len,
+                                        e->free.ftable_num,
+                                        find_ftable(layout, e->free.ftable_num));
                        break;
                case DATA:
                        add_to_hashtable(tdb, e->base.off, e->used.key);
@@ -255,5 +312,37 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
                }
        }
 
+       tdb->ftable_off = find_ftable(layout, 0)->base.off;
+
+       /* Get physical if they asked for it. */
+       if (layout->filename) {
+               int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
+                             0600);
+               if (fd < 0)
+                       err(1, "opening %s for writing", layout->filename);
+               if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
+                   != tdb->file->map_size)
+                       err(1, "writing %s", layout->filename);
+               close(fd);
+               tdb_close(tdb);
+               /* NOMMAP is for lockcheck. */
+               tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
+                              &tap_log_attr);
+       }
+
        return tdb;
 }
+
+void tdb_layout_free(struct tdb_layout *layout)
+{
+       unsigned int i;
+
+       for (i = 0; i < layout->num_elems; i++) {
+               if (layout->elem[i].base.type == DATA) {
+                       free(layout->elem[i].used.key.dptr);
+                       free(layout->elem[i].used.data.dptr);
+               }
+       }
+       free(layout->elem);
+       free(layout);
+}