return off >> tdb->header.v.zone_bits;
}
-/* Returns fl->max_bucket + 1, or list number to search. */
+/* Returns free_buckets + 1, or list number to search. */
static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket)
{
tdb_off_t first, off;
/* Speculatively search for a non-zero bucket. */
first = tdb->last_zone * (tdb->header.v.free_buckets+1) + bucket;
off = tdb_find_nonzero_off(tdb, free_list_off(tdb, first),
- tdb->header.v.free_buckets - bucket);
+ tdb->header.v.free_buckets + 1 - bucket);
return bucket + off;
}
tdb->last_zone = zone_of(tdb, off);
list = tdb->last_zone * (tdb->header.v.free_buckets+1)
+ size_to_bucket(tdb, new.data_len);
-
+
if (tdb_lock_free_list(tdb, list, TDB_LOCK_WAIT) != 0)
return -1;
break;
}
- if (remove_from_list(tdb, list, r) == -1) {
+ if (remove_from_list(tdb, nlist, r) == -1) {
tdb_unlock_free_list(tdb, nlist);
goto err;
}
if (!r)
goto err;
+ if (r->data_len != data_len) {
+ tdb->ecode = TDB_ERR_CORRUPT;
+ tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
+ "coalesce: expected data len %llu not %llu\n",
+ (long long)data_len, (long long)r->data_len);
+ goto err;
+ }
+
if (remove_from_list(tdb, list, r) == -1)
goto err;
tdb_len_t *actual)
{
tdb_off_t list;
- tdb_off_t off, prev, best_off;
+ tdb_off_t off, best_off;
struct tdb_free_record pad, best = { 0 }, *r;
double multiplier;
return TDB_OFF_ERR;
}
- prev = free_list_off(tdb, list);
- off = tdb_read_off(tdb, prev);
-
- if (unlikely(off == TDB_OFF_ERR))
- goto unlock_err;
-
best.data_len = -1ULL;
best_off = 0;
multiplier = 1.0;
/* Walk the list to see if any are large enough, getting less fussy
* as we go. */
- while (off) {
- prev = off;
- off = tdb_read_off(tdb, prev);
- if (unlikely(off == TDB_OFF_ERR))
- goto unlock_err;
+ off = tdb_read_off(tdb, free_list_off(tdb, list));
+ if (unlikely(off == TDB_OFF_ERR))
+ goto unlock_err;
+ while (off) {
r = tdb_get(tdb, off, &pad, sizeof(*r));
if (!r)
goto unlock_err;
+
if (r->magic != TDB_FREE_MAGIC) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"lock_and_alloc: %llu non-free 0x%llx\n",
best = *r;
}
- if (best.data_len < size * multiplier && best_off) {
- /* We're happy with this size: take it. */
- if (remove_from_list(tdb, list, &best) != 0)
- goto unlock_err;
- tdb_unlock_free_list(tdb, list);
+ if (best.data_len < size * multiplier && best_off)
+ goto use_best;
- if (to_used_record(tdb, best_off, size, best.data_len,
- actual)) {
- return -1;
- }
- return best_off;
- }
multiplier *= 1.01;
/* Since we're going slow anyway, try coalescing here. */
/* This has unlocked list, restart. */
goto again;
}
+ off = r->next;
+ }
+
+ /* If we found anything at all, use it. */
+ if (best_off) {
+ use_best:
+ /* We're happy with this size: take it. */
+ if (remove_from_list(tdb, list, &best) != 0)
+ goto unlock_err;
+ tdb_unlock_free_list(tdb, list);
+
+ if (to_used_record(tdb, best_off, size, best.data_len,
+ actual)) {
+ return -1;
+ }
+ return best_off;
}
tdb_unlock_free_list(tdb, list);
{
tdb_off_t i, off;
- do {
- for (i = 0; i < tdb->header.v.num_zones; i++) {
- /* Try getting one from list. */
- off = lock_and_alloc(tdb, tdb->header.v.free_buckets,
- size, actual);
- if (off == TDB_OFF_ERR)
- return TDB_OFF_ERR;
- if (off != 0)
- return off;
- /* FIXME: Coalesce! */
- }
- } while (tdb_expand(tdb, 0, size, false) == 0);
-
- return TDB_OFF_ERR;
+ for (i = 0; i < tdb->header.v.num_zones; i++) {
+ /* Try getting one from list. */
+ off = lock_and_alloc(tdb, tdb->header.v.free_buckets,
+ size, actual);
+ if (off == TDB_OFF_ERR)
+ return TDB_OFF_ERR;
+ if (off != 0)
+ return off;
+ /* FIXME: Coalesce! */
+ }
+ return 0;
}
static tdb_off_t get_free(struct tdb_context *tdb, size_t size,
tdb_off_t b;
/* Start at exact size bucket, and search up... */
- for (b = bucket; b <= tdb->header.v.num_zones; b++) {
+ for (b = bucket; b <= tdb->header.v.free_buckets; b++) {
b = find_free_head(tdb, b);
/* Non-empty list? Try getting block. */
- if (b <= tdb->header.v.num_zones) {
+ if (b <= tdb->header.v.free_buckets) {
/* Try getting one from list. */
off = lock_and_alloc(tdb, b, size, actual);
if (off == TDB_OFF_ERR)
return true;
}
-/* Expand the database. */
-int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
- bool growing)
+/* Returns how much extra room we get, or TDB_OFF_ERR. */
+static tdb_len_t expand_to_fill_zones(struct tdb_context *tdb)
{
- uint64_t new_num_buckets, new_num_zones, new_zone_bits;
- uint64_t i, old_num_total, old_num_zones, old_size, old_zone_bits;
- tdb_len_t add, freebucket_size, needed;
- tdb_off_t off, old_free_off;
- const tdb_off_t *oldf;
- struct tdb_used_record fhdr;
-
- /* We need room for the record header too. */
- needed = sizeof(struct tdb_used_record)
- + adjust_size(klen, dlen, growing);
-
- /* tdb_allrecord_lock will update header; did zones change? */
- old_zone_bits = tdb->header.v.zone_bits;
- old_num_zones = tdb->header.v.num_zones;
-
- /* FIXME: this is overkill. An expand lock? */
- if (tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false) == -1)
- return -1;
-
- /* Someone may have expanded for us. */
- if (old_zone_bits != tdb->header.v.zone_bits
- || old_num_zones != tdb->header.v.num_zones)
- goto success;
-
- /* They may have also expanded the underlying size (otherwise we'd
- * have expanded our mmap to look at those offsets already). */
- old_size = tdb->map_size;
- tdb->methods->oob(tdb, tdb->map_size + 1, true);
- if (tdb->map_size != old_size)
- goto success;
-
- /* Did we enlarge zones without enlarging file? */
- if (tdb->map_size < tdb->header.v.num_zones<<tdb->header.v.zone_bits) {
- add = (tdb->header.v.num_zones<<tdb->header.v.zone_bits)
- - tdb->map_size;
- /* Updates tdb->map_size. */
- if (tdb->methods->expand_file(tdb, add) == -1)
- goto fail;
- if (add_free_record(tdb, tdb->map_size - add, add) == -1)
- goto fail;
- if (add >= needed) {
- /* Allocate from this zone. */
- tdb->last_zone = zone_of(tdb, tdb->map_size - add);
- goto success;
- }
- }
-
- /* Slow path. Should we increase the number of buckets? */
- new_num_buckets = tdb->header.v.free_buckets;
- if (larger_buckets_might_help(tdb))
- new_num_buckets++;
+ tdb_len_t add;
- /* Now we'll need room for the new free buckets, too. Assume
- * worst case (zones expand). */
- needed += sizeof(fhdr)
- + ((tdb->header.v.num_zones+1)
- * (new_num_buckets+1) * sizeof(tdb_off_t));
+ /* We can enlarge zones without enlarging file to match. */
+ add = (tdb->header.v.num_zones<<tdb->header.v.zone_bits)
+ - tdb->map_size;
+ if (add <= sizeof(struct tdb_free_record))
+ return 0;
- /* If we need less that one zone, and they're working well, just add
- * another one. */
- if (needed < (1UL<<tdb->header.v.zone_bits) && zones_happy(tdb)) {
- new_num_zones = tdb->header.v.num_zones+1;
- new_zone_bits = tdb->header.v.zone_bits;
- add = 1ULL << tdb->header.v.zone_bits;
- } else {
- /* Increase the zone size. */
- new_num_zones = tdb->header.v.num_zones;
- new_zone_bits = tdb->header.v.zone_bits+1;
- while ((new_num_zones << new_zone_bits) - tdb->map_size
- < needed) {
- new_zone_bits++;
- }
+ /* Updates tdb->map_size. */
+ if (tdb->methods->expand_file(tdb, add) == -1)
+ return TDB_OFF_ERR;
+ if (add_free_record(tdb, tdb->map_size - add, add) == -1)
+ return TDB_OFF_ERR;
+ return add;
+}
- /* We expand by enough zones to meet the need. */
- add = (needed + (1ULL << new_zone_bits)-1)
- & ~((1ULL << new_zone_bits)-1);
- }
+static int update_zones(struct tdb_context *tdb,
+ uint64_t new_num_zones,
+ uint64_t new_zone_bits,
+ uint64_t new_num_buckets,
+ tdb_len_t add)
+{
+ tdb_len_t freebucket_size;
+ const tdb_off_t *oldf;
+ tdb_off_t i, off, old_num_total, old_free_off;
+ struct tdb_used_record fhdr;
/* Updates tdb->map_size. */
if (tdb->methods->expand_file(tdb, add) == -1)
- goto fail;
+ return -1;
/* Use first part as new free bucket array. */
off = tdb->map_size - add;
/* Write header. */
if (set_header(tdb, &fhdr, 0, freebucket_size, freebucket_size, 0))
- goto fail;
+ return -1;
if (tdb_write_convert(tdb, off, &fhdr, sizeof(fhdr)) == -1)
- goto fail;
+ return -1;
/* Adjust off to point to start of buckets, add to be remainder. */
add -= freebucket_size + sizeof(fhdr);
old_num_total = tdb->header.v.num_zones*(tdb->header.v.free_buckets+1);
old_free_off = tdb->header.v.free_off;
oldf = tdb_access_read(tdb, old_free_off,
- old_num_total * sizeof(tdb_off_t));
+ old_num_total * sizeof(tdb_off_t), true);
if (!oldf)
- goto fail;
+ return -1;
/* Switch to using our new zone. */
- if (zero_out(tdb, off, new_num_zones * (new_num_buckets + 1)) == -1)
+ if (zero_out(tdb, off, freebucket_size) == -1)
goto fail_release;
+
tdb->header.v.free_off = off;
tdb->header.v.num_zones = new_num_zones;
+ tdb->header.v.zone_bits = new_zone_bits;
tdb->header.v.free_buckets = new_num_buckets;
/* FIXME: If zone size hasn't changed, can simply copy pointers. */
}
}
-
/* Free up the old free buckets. */
old_free_off -= sizeof(fhdr);
if (tdb_read_convert(tdb, old_free_off, &fhdr, sizeof(fhdr)) == -1)
goto fail_release;
if (add_free_record(tdb, old_free_off,
- rec_data_length(&fhdr)+rec_extra_padding(&fhdr)))
+ sizeof(fhdr)
+ + rec_data_length(&fhdr)
+ + rec_extra_padding(&fhdr)))
goto fail_release;
/* Add the rest as a new free record. */
/* Start allocating from where the new space is. */
tdb->last_zone = zone_of(tdb, tdb->map_size - add);
tdb_access_release(tdb, oldf);
+ return write_header(tdb);
+
+fail_release:
+ tdb_access_release(tdb, oldf);
+ return -1;
+}
+
+/* Expand the database. */
+int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
+ bool growing)
+{
+ uint64_t new_num_buckets, new_num_zones, new_zone_bits;
+ uint64_t old_num_zones, old_size, old_zone_bits;
+ tdb_len_t add, needed;
+
+ /* We need room for the record header too. */
+ needed = sizeof(struct tdb_used_record)
+ + adjust_size(klen, dlen, growing);
+
+ /* tdb_allrecord_lock will update header; did zones change? */
+ old_zone_bits = tdb->header.v.zone_bits;
+ old_num_zones = tdb->header.v.num_zones;
+
+ /* FIXME: this is overkill. An expand lock? */
+ if (tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false) == -1)
+ return -1;
+
+ /* Someone may have expanded for us. */
+ if (old_zone_bits != tdb->header.v.zone_bits
+ || old_num_zones != tdb->header.v.num_zones)
+ goto success;
+
+ /* They may have also expanded the underlying size (otherwise we'd
+ * have expanded our mmap to look at those offsets already). */
+ old_size = tdb->map_size;
+ tdb->methods->oob(tdb, tdb->map_size + 1, true);
+ if (tdb->map_size != old_size)
+ goto success;
+
+ add = expand_to_fill_zones(tdb);
+ if (add == TDB_OFF_ERR)
+ goto fail;
+
+ if (add >= needed) {
+ /* Allocate from this zone. */
+ tdb->last_zone = zone_of(tdb, tdb->map_size - add);
+ goto success;
+ }
+
+ /* Slow path. Should we increase the number of buckets? */
+ new_num_buckets = tdb->header.v.free_buckets;
+ if (larger_buckets_might_help(tdb))
+ new_num_buckets++;
+
+ /* Now we'll need room for the new free buckets, too. Assume
+ * worst case (zones expand). */
+ needed += sizeof(struct tdb_used_record)
+ + ((tdb->header.v.num_zones+1)
+ * (new_num_buckets+1) * sizeof(tdb_off_t));
+
+ /* If we need less that one zone, and they're working well, just add
+ * another one. */
+ if (needed < (1UL<<tdb->header.v.zone_bits) && zones_happy(tdb)) {
+ new_num_zones = tdb->header.v.num_zones+1;
+ new_zone_bits = tdb->header.v.zone_bits;
+ add = 1ULL << tdb->header.v.zone_bits;
+ } else {
+ /* Increase the zone size. */
+ new_num_zones = tdb->header.v.num_zones;
+ new_zone_bits = tdb->header.v.zone_bits+1;
+ while ((new_num_zones << new_zone_bits)
+ < tdb->map_size + needed) {
+ new_zone_bits++;
+ }
+
+ /* We expand by enough full zones to meet the need. */
+ add = ((tdb->map_size + needed + (1ULL << new_zone_bits)-1)
+ & ~((1ULL << new_zone_bits)-1))
+ - tdb->map_size;
+ }
+
+ if (update_zones(tdb, new_num_zones, new_zone_bits, new_num_buckets,
+ add) == -1)
+ goto fail;
+
success:
tdb_allrecord_unlock(tdb, F_WRLCK);
return 0;
-fail_release:
- tdb_access_release(tdb, oldf);
fail:
tdb_allrecord_unlock(tdb, F_WRLCK);
return -1;