#include <stdlib.h>
#include <string.h>
#include <assert.h>
+#include <err.h>
#include "logging.h"
struct tdb_layout *new_tdb_layout(void)
struct tdb_layout *layout = malloc(sizeof(*layout));
layout->num_elems = 0;
layout->elem = NULL;
- layout->htable = -1;
return layout;
}
layout->elem[layout->num_elems++] = elem;
}
-void tdb_layout_add_zone(struct tdb_layout *layout,
- unsigned int zone_bits,
- bool fill_prev)
+void tdb_layout_add_freetable(struct tdb_layout *layout)
{
union tdb_layout_elem elem;
- if (fill_prev)
- tdb_layout_add_free(layout, 0);
- elem.base.type = ZONE;
- elem.zone.zone_bits = zone_bits;
+ elem.base.type = FREETABLE;
add(layout, elem);
}
-void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len)
+void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
+ unsigned ftable)
{
union tdb_layout_elem elem;
elem.base.type = FREE;
elem.free.len = len;
+ elem.free.ftable_num = ftable;
+ add(layout, elem);
+}
+
+void tdb_layout_add_capability(struct tdb_layout *layout,
+ uint64_t type,
+ bool write_breaks,
+ bool check_breaks,
+ bool open_breaks,
+ tdb_len_t extra)
+{
+ union tdb_layout_elem elem;
+ elem.base.type = CAPABILITY;
+ elem.capability.type = type;
+ if (write_breaks)
+ elem.capability.type |= TDB_CAP_NOWRITE;
+ if (open_breaks)
+ elem.capability.type |= TDB_CAP_NOOPEN;
+ if (check_breaks)
+ elem.capability.type |= TDB_CAP_NOCHECK;
+ elem.capability.extra = extra;
add(layout, elem);
}
add(layout, elem);
}
-void tdb_layout_add_hashtable(struct tdb_layout *layout,
- unsigned int hash_bits,
- tdb_len_t extra)
-{
- union tdb_layout_elem elem;
- elem.base.type = HASHTABLE;
- elem.hashtable.hash_bits = hash_bits;
- elem.hashtable.extra = extra;
- assert(layout->htable == -1U);
- layout->htable = layout->num_elems;
- add(layout, elem);
-}
-
static tdb_len_t free_record_len(tdb_len_t len)
{
return sizeof(struct tdb_used_record) + len;
static tdb_len_t hashtable_len(struct tle_hashtable *htable)
{
return sizeof(struct tdb_used_record)
- + (sizeof(tdb_off_t) << htable->hash_bits);
+ + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+ + htable->extra;
+}
+
+static tdb_len_t capability_len(struct tle_capability *cap)
+{
+ return sizeof(struct tdb_capability) + cap->extra;
}
-static tdb_len_t zone_header_len(struct tle_zone *zone)
+static tdb_len_t freetable_len(struct tle_freetable *ftable)
{
- return sizeof(struct free_zone_header)
- + sizeof(tdb_off_t) * (BUCKETS_FOR_ZONE(zone->zone_bits)+1);
+ return sizeof(struct tdb_freetable);
}
static void set_free_record(void *mem, tdb_len_t len)
/* We do all the work in add_to_freetable */
}
+static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra)
+{
+ if (extra)
+ ((char *)(u + 1))[len] = '\0';
+}
+
static void set_data_record(void *mem, struct tdb_context *tdb,
- struct tle_zone *last_zone,
struct tle_used *used)
{
struct tdb_used_record *u = mem;
- set_header(tdb, u, used->key.dsize, used->data.dsize,
+ set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
used->key.dsize + used->data.dsize + used->extra,
- tdb_hash(tdb, used->key.dptr, used->key.dsize),
- last_zone->zone_bits);
+ tdb_hash(tdb, used->key.dptr, used->key.dsize));
memcpy(u + 1, used->key.dptr, used->key.dsize);
memcpy((char *)(u + 1) + used->key.dsize,
used->data.dptr, used->data.dsize);
+ add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra);
}
static void set_hashtable(void *mem, struct tdb_context *tdb,
- struct tle_zone *last_zone,
struct tle_hashtable *htable)
{
struct tdb_used_record *u = mem;
- tdb_len_t len = sizeof(tdb_off_t) << htable->hash_bits;
+ tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
- set_header(tdb, u, 0, len, len + htable->extra, 0,
- last_zone->zone_bits);
+ set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
memset(u + 1, 0, len);
+ add_zero_pad(u, len, htable->extra);
}
-static void set_zone(void *mem, struct tdb_context *tdb,
- struct tle_zone *zone)
+static void set_capability(void *mem, struct tdb_context *tdb,
+ struct tle_capability *cap, struct tdb_header *hdr,
+ tdb_off_t last_cap)
{
- struct free_zone_header *fz = mem;
- memset(fz, 0, zone_header_len(zone));
- fz->zone_bits = zone->zone_bits;
+ struct tdb_capability *c = mem;
+ tdb_len_t len = sizeof(*c) - sizeof(struct tdb_used_record) + cap->extra;
+
+ c->type = cap->type;
+ c->next = 0;
+ set_header(tdb, &c->hdr, TDB_CAP_MAGIC, 0, len, len, 0);
+
+ /* Append to capability list. */
+ if (!last_cap) {
+ hdr->capabilities = cap->base.off;
+ } else {
+ c = (struct tdb_capability *)((char *)hdr + last_cap);
+ c->next = cap->base.off;
+ }
+}
+
+static void set_freetable(void *mem, struct tdb_context *tdb,
+ struct tle_freetable *freetable, struct tdb_header *hdr,
+ tdb_off_t last_ftable)
+{
+ struct tdb_freetable *ftable = mem;
+ memset(ftable, 0, sizeof(*ftable));
+ set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
+ sizeof(*ftable) - sizeof(ftable->hdr),
+ sizeof(*ftable) - sizeof(ftable->hdr), 0);
+
+ if (last_ftable) {
+ ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
+ ftable->next = freetable->base.off;
+ } else {
+ hdr->free_table = freetable->base.off;
+ }
}
static void add_to_freetable(struct tdb_context *tdb,
- struct tle_zone *last_zone,
tdb_off_t eoff,
- tdb_off_t elen)
+ tdb_off_t elen,
+ unsigned ftable,
+ struct tle_freetable *freetable)
+{
+ tdb->tdb2.ftable_off = freetable->base.off;
+ tdb->tdb2.ftable = ftable;
+ add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen,
+ TDB_LOCK_WAIT, false);
+}
+
+static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
+{
+ return group_start
+ + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
+}
+
+/* Get bits from a value. */
+static uint32_t bits(uint64_t val, unsigned start, unsigned num)
+{
+ assert(num <= 32);
+ return (val >> start) & ((1U << num) - 1);
+}
+
+/* We take bits from the top: that way we can lock whole sections of the hash
+ * by using lock ranges. */
+static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
{
- add_free_record(tdb, last_zone->zone_bits, eoff,
- sizeof(struct tdb_used_record) + elen);
+ *used += num;
+ return bits(h, 64 - *used, num);
}
+static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
+ uint64_t h)
+{
+ return bucket
+ | new_off
+ | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
+ TDB_OFF_UPPER_STEAL_EXTRA)
+ << TDB_OFF_HASH_EXTRA_BIT);
+}
+
+/* FIXME: Our hash table handling here is primitive: we don't expand! */
static void add_to_hashtable(struct tdb_context *tdb,
tdb_off_t eoff,
struct tdb_data key)
{
- uint64_t hash = tdb_hash(tdb, key.dptr, key.dsize);
- tdb_off_t hoff;
+ uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
+ tdb_off_t b_off, group_start;
+ unsigned i, group, in_group;
+ unsigned used = 0;
+
+ group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
+ in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
+
+ group_start = offsetof(struct tdb_header, hashtable)
+ + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
+
+ for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
+ unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
+
+ b_off = hbucket_off(group_start, bucket);
+ if (tdb_read_off(tdb, b_off) == 0) {
+ tdb_write_off(tdb, b_off,
+ encode_offset(eoff, in_group, h));
+ return;
+ }
+ }
+ abort();
+}
- while (tdb_read_off(tdb, hoff = hash_off(tdb, hash)) != 0)
- hash++;
+static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
+{
+ unsigned i;
- tdb_write_off(tdb, hoff, eoff);
+ for (i = 0; i < layout->num_elems; i++) {
+ if (layout->elem[i].base.type != FREETABLE)
+ continue;
+ if (num == 0)
+ return &layout->elem[i].ftable;
+ num--;
+ }
+ abort();
}
/* FIXME: Support TDB_CONVERT */
-struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
+struct tdb_context *tdb_layout_get(struct tdb_layout *layout,
+ void (*freefn)(void *),
+ union tdb_attribute *attr)
{
unsigned int i;
- tdb_off_t off, len;
- tdb_len_t zone_left;
- struct tdb_header *hdr;
+ tdb_off_t off, len, last_ftable, last_cap;
char *mem;
struct tdb_context *tdb;
- struct tle_zone *last_zone = NULL;
-
- assert(layout->htable != -1U);
- assert(layout->elem[0].base.type == ZONE);
- zone_left = 0;
off = sizeof(struct tdb_header);
/* First pass of layout: calc lengths */
union tdb_layout_elem *e = &layout->elem[i];
e->base.off = off;
switch (e->base.type) {
- case ZONE:
- assert(zone_left == 0);
- len = zone_header_len(&e->zone);
- zone_left = 1ULL << e->zone.zone_bits;
+ case FREETABLE:
+ len = freetable_len(&e->ftable);
break;
case FREE:
- if (e->free.len == 0)
- e->free.len = zone_left
- - sizeof(struct tdb_used_record);
len = free_record_len(e->free.len);
break;
case DATA:
case HASHTABLE:
len = hashtable_len(&e->hashtable);
break;
+ case CAPABILITY:
+ len = capability_len(&e->capability);
+ break;
+ default:
+ abort();
}
off += len;
- assert(zone_left >= len);
- zone_left -= len;
}
- /* Fill final zone with free record. */
- if (zone_left != 0) {
- tdb_layout_add_free(layout,
- zone_left
- - sizeof(struct tdb_used_record));
- layout->elem[layout->num_elems-1].base.off = off;
- off += zone_left;
- }
-
- mem = malloc(off+1);
+ mem = malloc(off);
+ /* Fill with some weird pattern. */
+ memset(mem, 0x99, off);
/* Now populate our header, cribbing from a real TDB header. */
- tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
- hdr = (void *)mem;
- *hdr = tdb->header;
- hdr->v.generation++;
- hdr->v.hash_bits = layout->elem[layout->htable].hashtable.hash_bits;
- hdr->v.hash_off = layout->elem[layout->htable].base.off
- + sizeof(struct tdb_used_record);
+ tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, attr);
+ memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header));
/* Mug the tdb we have to make it use this. */
- free(tdb->map_ptr);
- tdb->map_ptr = mem;
- tdb->map_size = off+1;
- header_changed(tdb);
+ freefn(tdb->file->map_ptr);
+ tdb->file->map_ptr = mem;
+ tdb->file->map_size = off;
+ last_ftable = 0;
+ last_cap = 0;
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
- case ZONE:
- set_zone(mem + e->base.off, tdb, &e->zone);
- last_zone = &e->zone;
+ case FREETABLE:
+ set_freetable(mem + e->base.off, tdb, &e->ftable,
+ (struct tdb_header *)mem, last_ftable);
+ last_ftable = e->base.off;
break;
case FREE:
set_free_record(mem + e->base.off, e->free.len);
break;
case DATA:
- set_data_record(mem + e->base.off, tdb, last_zone,
- &e->used);
+ set_data_record(mem + e->base.off, tdb, &e->used);
break;
case HASHTABLE:
- set_hashtable(mem + e->base.off, tdb, last_zone,
- &e->hashtable);
+ set_hashtable(mem + e->base.off, tdb, &e->hashtable);
+ break;
+ case CAPABILITY:
+ set_capability(mem + e->base.off, tdb, &e->capability,
+ (struct tdb_header *)mem, last_cap);
+ last_cap = e->base.off;
break;
}
}
+ /* Must have a free table! */
+ assert(last_ftable);
/* Now fill the free and hash tables. */
for (i = 0; i < layout->num_elems; i++) {
union tdb_layout_elem *e = &layout->elem[i];
switch (e->base.type) {
- case ZONE:
- last_zone = &e->zone;
- break;
case FREE:
- add_to_freetable(tdb, last_zone,
- e->base.off, e->free.len);
+ add_to_freetable(tdb, e->base.off, e->free.len,
+ e->free.ftable_num,
+ find_ftable(layout, e->free.ftable_num));
break;
case DATA:
add_to_hashtable(tdb, e->base.off, e->used.key);
}
}
- /* Write tailer. */
- ((uint8_t *)tdb->map_ptr)[tdb->map_size-1] = last_zone->zone_bits;
+ tdb->tdb2.ftable_off = find_ftable(layout, 0)->base.off;
return tdb;
}
+
+void tdb_layout_write(struct tdb_layout *layout, void (*freefn)(void *),
+ union tdb_attribute *attr, const char *filename)
+{
+ struct tdb_context *tdb = tdb_layout_get(layout, freefn, attr);
+ int fd;
+
+ fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0600);
+ if (fd < 0)
+ err(1, "opening %s for writing", filename);
+ if (write(fd, tdb->file->map_ptr, tdb->file->map_size)
+ != tdb->file->map_size)
+ err(1, "writing %s", filename);
+ close(fd);
+ tdb_close(tdb);
+}
+
+void tdb_layout_free(struct tdb_layout *layout)
+{
+ unsigned int i;
+
+ for (i = 0; i < layout->num_elems; i++) {
+ if (layout->elem[i].base.type == DATA) {
+ free(layout->elem[i].used.key.dptr);
+ free(layout->elem[i].used.data.dptr);
+ }
+ }
+ free(layout->elem);
+ free(layout);
+}