< tdb->map_size) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"check: %llu zones size %llu don't cover %llu\n",
- (long long)(1ULL << tdb->header.v.zone_bits),
(long long)tdb->header.v.num_zones,
+ (long long)(1ULL << tdb->header.v.zone_bits),
(long long)tdb->map_size);
return false;
}
{
tdb_off_t i, off;
- do {
- for (i = 0; i < tdb->header.v.num_zones; i++) {
- /* Try getting one from list. */
- off = lock_and_alloc(tdb, tdb->header.v.free_buckets,
- size, actual);
- if (off == TDB_OFF_ERR)
- return TDB_OFF_ERR;
- if (off != 0)
- return off;
- /* FIXME: Coalesce! */
- }
- } while (tdb_expand(tdb, 0, size, false) == 0);
-
- return TDB_OFF_ERR;
+ for (i = 0; i < tdb->header.v.num_zones; i++) {
+ /* Try getting one from list. */
+ off = lock_and_alloc(tdb, tdb->header.v.free_buckets,
+ size, actual);
+ if (off == TDB_OFF_ERR)
+ return TDB_OFF_ERR;
+ if (off != 0)
+ return off;
+ /* FIXME: Coalesce! */
+ }
+ return 0;
}
static tdb_off_t get_free(struct tdb_context *tdb, size_t size,
/* Increase the zone size. */
new_num_zones = tdb->header.v.num_zones;
new_zone_bits = tdb->header.v.zone_bits+1;
- while ((new_num_zones << new_zone_bits) - tdb->map_size
- < needed) {
+ while ((new_num_zones << new_zone_bits)
+ < tdb->map_size + needed) {
new_zone_bits++;
}
- /* We expand by enough zones to meet the need. */
- add = (needed + (1ULL << new_zone_bits)-1)
- & ~((1ULL << new_zone_bits)-1);
+ /* We expand by enough full zones to meet the need. */
+ add = ((tdb->map_size + needed + (1ULL << new_zone_bits)-1)
+ & ~((1ULL << new_zone_bits)-1))
+ - tdb->map_size;
}
/* Updates tdb->map_size. */
old_num_total = tdb->header.v.num_zones*(tdb->header.v.free_buckets+1);
old_free_off = tdb->header.v.free_off;
oldf = tdb_access_read(tdb, old_free_off,
- old_num_total * sizeof(tdb_off_t));
+ old_num_total * sizeof(tdb_off_t), true);
if (!oldf)
goto fail;
/* Switch to using our new zone. */
- if (zero_out(tdb, off, new_num_zones * (new_num_buckets + 1)) == -1)
+ if (zero_out(tdb, off, freebucket_size) == -1)
goto fail_release;
+
tdb->header.v.free_off = off;
tdb->header.v.num_zones = new_num_zones;
+ tdb->header.v.zone_bits = new_zone_bits;
tdb->header.v.free_buckets = new_num_buckets;
/* FIXME: If zone size hasn't changed, can simply copy pointers. */
if (tdb_read_convert(tdb, old_free_off, &fhdr, sizeof(fhdr)) == -1)
goto fail_release;
if (add_free_record(tdb, old_free_off,
- rec_data_length(&fhdr)+rec_extra_padding(&fhdr)))
+ sizeof(fhdr)
+ + rec_data_length(&fhdr)
+ + rec_extra_padding(&fhdr)))
goto fail_release;
/* Add the rest as a new free record. */
if (ret)
return ret;
}
-
- if (unlikely(tdb_oob(tdb, off + len, false) == -1))
- return NULL;
-
- if (tdb->methods->read(tdb, off, pad, len) == -1)
- return NULL;
- return tdb_convert(tdb, pad, len);
+ return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
}
/* Endian conversion: we only ever deal with 8 byte quantities */
void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
{
- if (unlikely((tdb->flags & TDB_CONVERT))) {
+ if (unlikely((tdb->flags & TDB_CONVERT)) && buf) {
uint64_t i, *p = (uint64_t *)buf;
for (i = 0; i < size / 8; i++)
p[i] = bswap_64(p[i]);
uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
uint64_t num)
{
- uint64_t i, *val;
- bool alloc = false;
-
- val = tdb_direct(tdb, off, num * sizeof(tdb_off_t));
- if (!unlikely(val)) {
- val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t));
- if (!val)
- return num;
- alloc = true;
- }
+ uint64_t i;
+ const uint64_t *val;
+
+ /* Zero vs non-zero is the same unconverted: minor optimization. */
+ val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
+ if (!val)
+ return num;
for (i = 0; i < num; i++) {
if (val[i])
break;
}
- if (unlikely(alloc))
- free(val);
+ tdb_access_release(tdb, val);
return i;
}
uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
uint64_t num)
{
- uint64_t i, *val;
- bool alloc = false;
-
- val = tdb_direct(tdb, off, num * sizeof(tdb_off_t));
- if (!unlikely(val)) {
- val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t));
- if (!val)
- return num;
- alloc = true;
- }
+ uint64_t i;
+ const uint64_t *val;
+
+ /* Zero vs non-zero is the same unconverted: minor optimization. */
+ val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
+ if (!val)
+ return num;
for (i = 0; i < num; i++) {
if (!val[i])
break;
}
- if (unlikely(alloc))
- free(val);
+ tdb_access_release(tdb, val);
return i;
}
-static int fill(struct tdb_context *tdb,
- const void *buf, size_t size,
- tdb_off_t off, tdb_len_t len)
-{
- while (len) {
- size_t n = len > size ? size : len;
-
- if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
- tdb->ecode = TDB_ERR_IO;
- tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
- "fill write failed: giving up!\n");
- return -1;
- }
- len -= n;
- off += n;
- }
- return 0;
-}
-
int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
{
+ char buf[8192] = { 0 };
void *p = tdb_direct(tdb, off, len);
if (p) {
memset(p, 0, len);
return 0;
- } else {
- char buf[8192] = { 0 };
- return fill(tdb, buf, sizeof(buf), off, len);
}
+ while (len) {
+ unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
+ if (tdb->methods->write(tdb, off, buf, todo) == -1)
+ return -1;
+ len -= todo;
+ off += todo;
+ }
+ return 0;
}
tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
}
int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
- void *rec, size_t len)
+ const void *rec, size_t len)
{
- return tdb->methods->write(tdb, off, tdb_convert(tdb, rec, len), len);
+ int ret;
+ if (unlikely((tdb->flags & TDB_CONVERT))) {
+ void *conv = malloc(len);
+ if (!conv) {
+ tdb->ecode = TDB_ERR_OOM;
+ tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
+ "tdb_write: no memory converting %zu bytes\n",
+ len);
+ return -1;
+ }
+ memcpy(conv, rec, len);
+ ret = tdb->methods->write(tdb, off,
+ tdb_convert(tdb, conv, len), len);
+ free(conv);
+ } else
+ ret = tdb->methods->write(tdb, off, rec, len);
+
+ return ret;
}
int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
{
struct tdb_used_record pad, *r;
- void *key;
+ const void *key;
uint64_t klen, hash;
r = tdb_get(tdb, off, &pad, sizeof(pad));
return 0;
klen = rec_key_length(r);
- key = tdb_direct(tdb, off + sizeof(pad), klen);
- if (likely(key))
- return tdb_hash(tdb, key, klen);
-
- key = tdb_alloc_read(tdb, off + sizeof(pad), klen);
- if (unlikely(!key))
+ key = tdb_access_read(tdb, off + sizeof(pad), klen, false);
+ if (!key)
return 0;
+
hash = tdb_hash(tdb, key, klen);
- free(key);
+ tdb_access_release(tdb, key);
return hash;
}
-/* Give a piece of tdb data to a parser */
-int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
- tdb_off_t offset, tdb_len_t len,
- int (*parser)(TDB_DATA key, TDB_DATA data,
- void *private_data),
- void *private_data)
+static int fill(struct tdb_context *tdb,
+ const void *buf, size_t size,
+ tdb_off_t off, tdb_len_t len)
{
- TDB_DATA data;
- int result;
- bool allocated = false;
-
- data.dsize = len;
- data.dptr = tdb_direct(tdb, offset, len);
- if (unlikely(!data.dptr)) {
- if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
+ while (len) {
+ size_t n = len > size ? size : len;
+
+ if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
+ tdb->ecode = TDB_ERR_IO;
+ tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
+ "fill write failed: giving up!\n");
return -1;
}
- allocated = true;
+ len -= n;
+ off += n;
}
- result = parser(key, data, private_data);
- if (unlikely(allocated))
- free(data.dptr);
- return result;
+ return 0;
}
/* expand a file. we prefer to use ftruncate, as that is what posix
}
const void *tdb_access_read(struct tdb_context *tdb,
- tdb_off_t off, tdb_len_t len)
+ tdb_off_t off, tdb_len_t len, bool convert)
{
- const void *ret = tdb_direct(tdb, off, len);
+ const void *ret = NULL;
- if (!ret)
+ if (likely(!(tdb->flags & TDB_CONVERT)))
+ ret = tdb_direct(tdb, off, len);
+
+ if (!ret) {
ret = tdb_alloc_read(tdb, off, len);
+ if (convert)
+ tdb_convert(tdb, (void *)ret, len);
+ }
return ret;
}
void tdb_munmap(struct tdb_context *tdb);
void tdb_mmap(struct tdb_context *tdb);
-/* Hand data to a function, direct if possible */
-int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
- tdb_off_t offset, tdb_len_t len,
- int (*parser)(TDB_DATA key, TDB_DATA data,
- void *private_data),
- void *private_data);
-
/* Either make a copy into pad and return that, or return ptr into mmap.
* Converts endian (ie. will use pad in that case). */
void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len);
/* Either alloc a copy, or give direct access. Release frees or noop. */
const void *tdb_access_read(struct tdb_context *tdb,
- tdb_off_t off, tdb_len_t len);
+ tdb_off_t off, tdb_len_t len, bool convert);
void tdb_access_release(struct tdb_context *tdb, const void *p);
/* Convenience routine to get an offset. */
/* Allocate and make a copy of some offset. */
void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
-/* Munges record and writes it */
+/* Writes a converted copy of a record. */
int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
- void *rec, size_t len);
+ const void *rec, size_t len);
/* Reads record and converts it */
int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
if (keylen != key->dsize)
return TDB_OFF_ERR;
- rkey = tdb_access_read(tdb, off + sizeof(*rec), keylen);
+ rkey = tdb_access_read(tdb, off + sizeof(*rec), keylen, false);
if (!rkey)
return TDB_OFF_ERR;
if (memcmp(rkey, key->dptr, keylen) != 0)
}
/* FIXME: Optimize? */
-static void unlock_range(struct tdb_context *tdb,
+static void unlock_lists(struct tdb_context *tdb,
tdb_off_t list, tdb_len_t num,
int ltype)
{
}
/* FIXME: Optimize? */
-static int lock_range(struct tdb_context *tdb,
+static int lock_lists(struct tdb_context *tdb,
tdb_off_t list, tdb_len_t num,
int ltype)
{
for (i = list; i < list + num; i++) {
if (tdb_lock_list(tdb, i, ltype, TDB_LOCK_WAIT) != 0) {
- unlock_range(tdb, list, i - list, ltype);
+ unlock_lists(tdb, list, i - list, ltype);
return -1;
}
}
if (tdb_lock_list(tdb, 0, ltype, TDB_LOCK_WAIT))
return TDB_OFF_ERR;
len = tdb_find_zero_off(tdb, hash_off(tdb, 0), num);
- if (lock_range(tdb, 1, len, ltype) == -1) {
+ if (lock_lists(tdb, 1, len, ltype) == -1) {
tdb_unlock_list(tdb, 0, ltype);
return TDB_OFF_ERR;
}
start++;
pre_locks = 0;
}
- if (unlikely(lock_range(tdb, start, len, ltype) == -1)) {
+ if (unlikely(lock_lists(tdb, start, len, ltype) == -1)) {
if (pre_locks)
- unlock_range(tdb, 0, pre_locks, ltype);
+ unlock_lists(tdb, 0, pre_locks, ltype);
else
tdb_unlock_list(tdb, start, ltype);
return TDB_OFF_ERR;
/* Now, did we lose the race, and it's not zero any more? */
if (unlikely(tdb_read_off(tdb, hash_off(tdb, pre_locks + len)) != 0)) {
- unlock_range(tdb, 0, pre_locks, ltype);
+ unlock_lists(tdb, 0, pre_locks, ltype);
/* Leave the start locked, as expected. */
- unlock_range(tdb, start + 1, len - 1, ltype);
+ unlock_lists(tdb, start + 1, len - 1, ltype);
goto again;
}
/* If we fail, others will try after us. */
static void enlarge_hash(struct tdb_context *tdb)
{
- tdb_off_t newoff, i;
+ tdb_off_t newoff, oldoff, i;
+ tdb_len_t hlen;
uint64_t h, num = 1ULL << tdb->header.v.hash_bits;
struct tdb_used_record pad, *r;
if ((1ULL << tdb->header.v.hash_bits) != num)
goto unlock;
- newoff = alloc(tdb, 0, num * 2, 0, false);
+ /* Allocate our new array. */
+ hlen = num * sizeof(tdb_off_t) * 2;
+ newoff = alloc(tdb, 0, hlen, 0, false);
if (unlikely(newoff == TDB_OFF_ERR))
goto unlock;
if (unlikely(newoff == 0)) {
- if (tdb_expand(tdb, 0, num * 2, false) == -1)
+ if (tdb_expand(tdb, 0, hlen, false) == -1)
goto unlock;
- newoff = alloc(tdb, 0, num * 2, 0, false);
+ newoff = alloc(tdb, 0, hlen, 0, false);
if (newoff == TDB_OFF_ERR || newoff == 0)
goto unlock;
}
+ /* Step over record header! */
+ newoff += sizeof(struct tdb_used_record);
+
+ /* Starts all zero. */
+ if (zero_out(tdb, newoff, hlen) == -1)
+ goto unlock;
/* FIXME: If the space before is empty, we know this is in its ideal
- * location. We can steal a bit from the pointer to avoid rehash. */
- for (i = tdb_find_nonzero_off(tdb, tdb->header.v.hash_off, num);
+ * location. Or steal a bit from the pointer to avoid rehash. */
+ for (i = tdb_find_nonzero_off(tdb, hash_off(tdb, 0), num);
i < num;
- i += tdb_find_nonzero_off(tdb, tdb->header.v.hash_off
- + i*sizeof(tdb_off_t), num - i)) {
+ i += tdb_find_nonzero_off(tdb, hash_off(tdb, i), num - i)) {
tdb_off_t off;
- off = tdb_read_off(tdb, tdb->header.v.hash_off
- + i*sizeof(tdb_off_t));
+ off = tdb_read_off(tdb, hash_off(tdb, i));
if (unlikely(off == TDB_OFF_ERR))
goto unlock;
if (unlikely(!off)) {
}
/* Free up old hash. */
- r = tdb_get(tdb, tdb->header.v.hash_off, &pad, sizeof(*r));
+ oldoff = tdb->header.v.hash_off - sizeof(*r);
+ r = tdb_get(tdb, oldoff, &pad, sizeof(*r));
if (!r)
goto unlock;
- add_free_record(tdb, tdb->header.v.hash_off,
- rec_data_length(r) + rec_extra_padding(r));
+ add_free_record(tdb, oldoff,
+ sizeof(*r)+rec_data_length(r)+rec_extra_padding(r));
/* Now we write the modified header. */
- tdb->header.v.generation++;
tdb->header.v.hash_bits++;
tdb->header.v.hash_off = newoff;
- tdb_write_convert(tdb, offsetof(struct tdb_header, v),
- &tdb->header.v, sizeof(tdb->header.v));
+ write_header(tdb);
unlock:
tdb_allrecord_unlock(tdb, F_WRLCK);
}
/* Allocate a new record. */
new_off = alloc(tdb, key.dsize, dbuf.dsize, h, growing);
if (new_off == 0) {
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
/* Expand, then try again... */
if (tdb_expand(tdb, key.dsize, dbuf.dsize, growing) == -1)
return -1;
goto fail;
/* FIXME: tdb_increment_seqnum(tdb); */
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
/* FIXME: by simple simulation, this approximated 60% full.
* Check in real case! */
return 0;
fail:
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
return -1;
}
}
if (!off) {
- unlock_range(tdb, start, num_locks, F_RDLCK);
+ unlock_lists(tdb, start, num_locks, F_RDLCK);
tdb->ecode = TDB_ERR_NOEXIST;
return tdb_null;
}
ret.dsize = rec_data_length(&rec);
ret.dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
ret.dsize);
- unlock_range(tdb, start, num_locks, F_RDLCK);
+ unlock_lists(tdb, start, num_locks, F_RDLCK);
return ret;
}
}
if (!off) {
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
tdb->ecode = TDB_ERR_NOEXIST;
return -1;
}
+ rec_extra_padding(&rec)) != 0)
goto unlock_err;
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
return 0;
unlock_err:
- unlock_range(tdb, start, num_locks, F_WRLCK);
+ unlock_lists(tdb, start, num_locks, F_WRLCK);
return -1;
}
--- /dev/null
+#include <ccan/tdb2/tdb.c>
+#include <ccan/tdb2/free.c>
+#include <ccan/tdb2/lock.c>
+#include <ccan/tdb2/io.c>
+#include <ccan/tdb2/check.c>
+#include <ccan/tap/tap.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ struct tdb_context *tdb;
+ int flags[] = { TDB_INTERNAL, TDB_DEFAULT,
+ TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT };
+
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1);
+ for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+ tdb = tdb_open("/tmp/run-new_database.tdb", flags[i],
+ O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
+ tdb->log = tap_log_fn;
+ ok1(tdb);
+ if (tdb) {
+ enlarge_hash(tdb);
+ ok1(tdb_check(tdb, NULL, NULL) == 0);
+ tdb_close(tdb);
+ }
+ }
+ ok1(tap_log_messages == 0);
+ return exit_status();
+}
int flags[] = { TDB_INTERNAL, TDB_DEFAULT,
TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT };
- plan_tests(sizeof(flags) / sizeof(flags[0]) * 3 + 1);
+ plan_tests(sizeof(flags) / sizeof(flags[0]) * 10 + 1);
for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
tdb = tdb_open("/tmp/run-new_database.tdb", flags[i],
O_RDWR|O_CREAT|O_TRUNC, 0600, NULL);
tdb->log = tap_log_fn;
ok1(tdb);
if (tdb) {
+ /* First expand (expand file to fill zone). */
ok1(tdb_expand(tdb, 1, 1, false) == 0);
+ ok1(tdb->header.v.num_zones == 1);
+ ok1(tdb_check(tdb, NULL, NULL) == 0);
+ /* Little expand (extra zone). */
+ ok1(tdb_expand(tdb, 1, 1, false) == 0);
+ ok1(tdb->header.v.num_zones == 2);
+ ok1(tdb_check(tdb, NULL, NULL) == 0);
+ /* Big expand (enlarge zones) */
+ ok1(tdb_expand(tdb, 1, 4096, false) == 0);
+ ok1(tdb->header.v.num_zones == 2);
ok1(tdb_check(tdb, NULL, NULL) == 0);
tdb_close(tdb);
}