From: Rusty Russell Date: Wed, 17 Nov 2010 09:55:41 +0000 (+1030) Subject: tdb2: get rid of zones X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=d70577b6aff24ccf6815896509dabb8c9ac07904 tdb2: get rid of zones Zones were a bad idea. They mean we can't simply add stuff to the end of the file (which transactions relied upon), and there's no good heuristic in how to size them. This patch reverts us to a single free table. --- diff --git a/ccan/tdb2/check.c b/ccan/tdb2/check.c index 5a16dbf4..bd7679c9 100644 --- a/ccan/tdb2/check.c +++ b/ccan/tdb2/check.c @@ -237,7 +237,7 @@ static bool check_hash_tree(struct tdb_context *tdb, goto fail; /* Bottom bits must match header. */ - if ((h & ((1 << 5)-1)) != rec_hash(&rec)) { + if ((h & ((1 << 11)-1)) != rec_hash(&rec)) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: Bad hash magic at" " offset %llu (0x%llx vs 0x%llx)\n", @@ -267,7 +267,8 @@ static bool check_hash(struct tdb_context *tdb, 0, 0, used, num_used, &num_found)) return false; - if (num_found != num_used) { + /* 1 is for the free list. */ + if (num_found != num_used - 1) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: Not all entries are in hash\n"); return false; @@ -278,8 +279,7 @@ static bool check_hash(struct tdb_context *tdb, static bool check_free(struct tdb_context *tdb, tdb_off_t off, const struct tdb_free_record *frec, - tdb_off_t prev, - tdb_off_t zone_off, unsigned int bucket) + tdb_off_t prev, unsigned int bucket) { if (frec_magic(frec) != TDB_FREE_MAGIC) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, @@ -291,20 +291,11 @@ static bool check_free(struct tdb_context *tdb, + frec->data_len+sizeof(struct tdb_used_record), false)) return false; - if (off < zone_off || off >= zone_off + (1ULL<log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "tdb_check: offset %llu outside zone %llu-%llu\n", - (long long)off, - (long long)zone_off, - (long long)zone_off + (1ULL<data_len) != bucket) { + if (size_to_bucket(frec->data_len) != bucket) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: offset %llu in wrong bucket %u vs %u\n", (long long)off, - bucket, - size_to_bucket(frec_zone_bits(frec), frec->data_len)); + bucket, size_to_bucket(frec->data_len)); return false; } if (prev != frec->prev) { @@ -317,31 +308,41 @@ static bool check_free(struct tdb_context *tdb, return true; } -static tdb_len_t check_free_list(struct tdb_context *tdb, - tdb_off_t zone_off, - tdb_off_t free[], - size_t num_free, - size_t *num_found) +static bool check_free_list(struct tdb_context *tdb, + tdb_off_t flist_off, + tdb_off_t free[], + size_t num_free, + size_t *num_found) { - struct free_zone_header zhdr; + struct tdb_freelist flist; tdb_off_t h; unsigned int i; - if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1) - return TDB_OFF_ERR; + if (tdb_read_convert(tdb, flist_off, &flist, sizeof(flist)) == -1) + return false; + + if (rec_magic(&flist.hdr) != TDB_MAGIC + || rec_key_length(&flist.hdr) != 0 + || rec_data_length(&flist.hdr) != sizeof(flist) - sizeof(flist.hdr) + || rec_hash(&flist.hdr) != 1) { + tdb->log(tdb, TDB_DEBUG_ERROR, + tdb->log_priv, + "tdb_check: Invalid header on free list\n"); + return false; + } - for (i = 0; i <= BUCKETS_FOR_ZONE(zhdr.zone_bits); i++) { + for (i = 0; i < TDB_FREE_BUCKETS; i++) { tdb_off_t off, prev = 0, *p; struct tdb_free_record f; - h = bucket_off(zone_off, i); + h = bucket_off(flist_off, i); for (off = tdb_read_off(tdb, h); off; off = f.next) { if (off == TDB_OFF_ERR) - return TDB_OFF_ERR; + return false; if (tdb_read_convert(tdb, off, &f, sizeof(f))) - return TDB_OFF_ERR; - if (!check_free(tdb, off, &f, prev, zone_off, i)) - return TDB_OFF_ERR; + return false; + if (!check_free(tdb, off, &f, prev, i)) + return false; /* FIXME: Check hash bits */ p = asearch(&off, free, num_free, off_cmp); @@ -351,7 +352,7 @@ static tdb_len_t check_free_list(struct tdb_context *tdb, "tdb_check: Invalid offset" " %llu in free table\n", (long long)off); - return TDB_OFF_ERR; + return false; } /* Mark it invalid. */ *p ^= 1; @@ -359,79 +360,38 @@ static tdb_len_t check_free_list(struct tdb_context *tdb, prev = off; } } - return 1ULL << zhdr.zone_bits; + return true; } -static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off, - tdb_off_t **used, size_t *num_used, - tdb_off_t **free, size_t *num_free, - unsigned int *max_zone_bits) +static bool check_linear(struct tdb_context *tdb, + tdb_off_t **used, size_t *num_used, + tdb_off_t **free, size_t *num_free) { - struct free_zone_header zhdr; - tdb_off_t off, hdrlen, end; + tdb_off_t off; tdb_len_t len; - if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1) - return TDB_OFF_ERR; - - if (zhdr.zone_bits < INITIAL_ZONE_BITS) { - tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "check: bad zone_bits %llu at zone %llu\n", - (long long)zhdr.zone_bits, (long long)zone_off); - return TDB_OFF_ERR; - } - - /* Zone bits can only increase... */ - if (zhdr.zone_bits > *max_zone_bits) - *max_zone_bits = zhdr.zone_bits; - else if (zhdr.zone_bits < *max_zone_bits) { - tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "check: small zone_bits %llu at zone %llu\n", - (long long)zhdr.zone_bits, (long long)zone_off); - return TDB_OFF_ERR; - } - - /* Zone header must be within file! */ - hdrlen = sizeof(zhdr) - + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t); - - if (tdb->methods->oob(tdb, zone_off + hdrlen, true)) - return TDB_OFF_ERR; - - end = zone_off + (1ULL << zhdr.zone_bits); - if (end > tdb->map_size) - end = tdb->map_size; - - for (off = zone_off + hdrlen; off < end; off += len) { + for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) { union { struct tdb_used_record u; struct tdb_free_record f; } pad, *p; p = tdb_get(tdb, off, &pad, sizeof(pad)); if (!p) - return TDB_OFF_ERR; + return false; if (frec_magic(&p->f) == TDB_FREE_MAGIC || frec_magic(&p->f) == TDB_COALESCING_MAGIC) { - if (frec_zone_bits(&p->f) != zhdr.zone_bits) { - tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "tdb_check: Bad free zone bits %u" - " at offset %llu\n", - frec_zone_bits(&p->f), - (long long)off); - return TDB_OFF_ERR; - } len = sizeof(p->u) + p->f.data_len; - if (off + len > zone_off + (1ULL << zhdr.zone_bits)) { + if (off + len > tdb->map_size) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: free overlength %llu" " at offset %llu\n", (long long)len, (long long)off); - return TDB_OFF_ERR; + return false; } /* This record is free! */ if (frec_magic(&p->f) == TDB_FREE_MAGIC && !append(free, num_free, off)) - return TDB_OFF_ERR; + return false; } else { uint64_t klen, dlen, extra; @@ -442,32 +402,23 @@ static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off, " at offset %llu\n", (long long)rec_magic(&p->u), (long long)off); - return TDB_OFF_ERR; + return false; } - if (rec_zone_bits(&p->u) != zhdr.zone_bits) { - tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "tdb_check: Bad zone bits %u" - " at offset %llu\n", - rec_zone_bits(&p->u), - (long long)off); - return TDB_OFF_ERR; - } - if (!append(used, num_used, off)) - return TDB_OFF_ERR; + return false; klen = rec_key_length(&p->u); dlen = rec_data_length(&p->u); extra = rec_extra_padding(&p->u); len = sizeof(p->u) + klen + dlen + extra; - if (off + len > zone_off + (1ULL << zhdr.zone_bits)) { + if (off + len > tdb->map_size) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: used overlength %llu" " at offset %llu\n", (long long)len, (long long)off); - return TDB_OFF_ERR; + return false; } if (len < sizeof(p->f)) { @@ -475,11 +426,11 @@ static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off, "tdb_check: too short record %llu at" " %llu\n", (long long)len, (long long)off); - return TDB_OFF_ERR; + return false; } } } - return off - zone_off; + return true; } /* FIXME: call check() function. */ @@ -487,10 +438,8 @@ int tdb_check(struct tdb_context *tdb, int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), void *private_data) { - tdb_off_t *free = NULL, *used = NULL, off; - tdb_len_t len; + tdb_off_t *free = NULL, *used = NULL; size_t num_free = 0, num_used = 0, num_found = 0; - unsigned max_zone_bits = INITIAL_ZONE_BITS; if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0) return -1; @@ -504,26 +453,16 @@ int tdb_check(struct tdb_context *tdb, goto fail; /* First we do a linear scan, checking all records. */ - for (off = sizeof(struct tdb_header); - off < tdb->map_size; - off += len) { - len = check_zone(tdb, off, &used, &num_used, &free, &num_free, - &max_zone_bits); - if (len == TDB_OFF_ERR) - goto fail; - } + if (!check_linear(tdb, &used, &num_used, &free, &num_free)) + goto fail; /* FIXME: Check key uniqueness? */ if (!check_hash(tdb, used, num_used)) goto fail; - for (off = sizeof(struct tdb_header); - off < tdb->map_size - 1; - off += len) { - len = check_free_list(tdb, off, free, num_free, &num_found); - if (len == TDB_OFF_ERR) - goto fail; - } + if (!check_free_list(tdb, tdb->flist_off, free, num_free, &num_found)) + goto fail; + if (num_found != num_free) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: Not all entries are in free table\n"); diff --git a/ccan/tdb2/free.c b/ccan/tdb2/free.c index f5107fad..3fc8bef6 100644 --- a/ccan/tdb2/free.c +++ b/ccan/tdb2/free.c @@ -27,46 +27,8 @@ static unsigned fls64(uint64_t val) return ilog64(val); } -static unsigned ffs64(uint64_t val) -{ -#if HAVE_BUILTIN_FFSLL - return __builtin_ffsll(val); -#else - unsigned r = 0; - - if (!val) - return 0; - - if (!(val & 0xffffffff)) { - val >>= 32; - r += 32; - } - if (!(val & 0xffff)) { - val >>= 16; - r += 16; - } - if (!(val & 0xff)) { - val >>= 8; - r += 8; - } - if (!(val & 0xf)) { - val >>= 4; - r += 4; - } - if (!(val & 0x3)) { - val >>= 2; - r += 2; - } - if (!(val & 0x1)) { - val >>= 1; - r += 1; - } - return r; -#endif -} - /* In which bucket would we find a particular record size? (ignoring header) */ -unsigned int size_to_bucket(unsigned int zone_bits, tdb_len_t data_len) +unsigned int size_to_bucket(tdb_len_t data_len) { unsigned int bucket; @@ -82,72 +44,24 @@ unsigned int size_to_bucket(unsigned int zone_bits, tdb_len_t data_len) bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2; } - if (unlikely(bucket > BUCKETS_FOR_ZONE(zone_bits))) - bucket = BUCKETS_FOR_ZONE(zone_bits); + if (unlikely(bucket >= TDB_FREE_BUCKETS)) + bucket = TDB_FREE_BUCKETS - 1; return bucket; } -/* Binary search for the zone for this offset. */ -static tdb_off_t off_to_zone(struct tdb_context *tdb, tdb_off_t off, - struct free_zone_header *zhdr) -{ - tdb_off_t start, end; - - start = sizeof(struct tdb_header); - end = start + (1ULL << fls64(tdb->map_size - start)); - - for (;;) { - if (tdb_read_convert(tdb, start, zhdr, sizeof(*zhdr)) == -1) - return TDB_OFF_ERR; - - /* Is it inside this zone? */ - if (off < start + (1ULL << zhdr->zone_bits)) - return start; - - /* In practice, start + end won't overflow. */ - if (off >= (start + end) / 2) - start = (start + end) / 2; - else - end = (start + end) / 2; - } -} - -static tdb_off_t last_zone(struct tdb_context *tdb, - struct free_zone_header *zhdr) -{ - return off_to_zone(tdb, tdb->map_size - 1, zhdr); -} - -int tdb_zone_init(struct tdb_context *tdb) +int tdb_flist_init(struct tdb_context *tdb) { - unsigned int i; - uint64_t randoff = 0; - - /* We start in a random zone, to spread the load. */ - for (i = 0; i < 64; i += fls64(RAND_MAX)) - randoff ^= ((uint64_t)random()) << i; - randoff = sizeof(struct tdb_header) - + (randoff % (tdb->map_size - sizeof(struct tdb_header))); - - tdb->zone_off = off_to_zone(tdb, randoff, &tdb->zhdr); - if (tdb->zone_off == TDB_OFF_ERR) + tdb->flist_off = tdb_read_off(tdb, + offsetof(struct tdb_header, free_list)); + if (tdb->flist_off == TDB_OFF_ERR) return -1; return 0; } -/* Where's the header, given a zone size of 1 << zone_bits? */ -static tdb_off_t zone_off(tdb_off_t off, unsigned int zone_bits) -{ - off -= sizeof(struct tdb_header); - return (off & ~((1ULL << zone_bits) - 1)) + sizeof(struct tdb_header); -} - /* Offset of a given bucket. */ -/* FIXME: bucket can be "unsigned" everywhere, or even uint8/16. */ -tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket) +tdb_off_t bucket_off(tdb_off_t flist_off, unsigned bucket) { - return zone_off - + sizeof(struct free_zone_header) + return flist_off + offsetof(struct tdb_freelist, buckets) + bucket * sizeof(tdb_off_t); } @@ -155,9 +69,8 @@ tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket) static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket) { /* Speculatively search for a non-zero bucket. */ - return tdb_find_nonzero_off(tdb, bucket_off(tdb->zone_off, 0), - bucket, - BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits) + 1); + return tdb_find_nonzero_off(tdb, bucket_off(tdb->flist_off, 0), + bucket, TDB_FREE_BUCKETS); } /* Remove from free bucket. */ @@ -247,7 +160,6 @@ static int enqueue_in_free(struct tdb_context *tdb, /* List need not be locked. */ int add_free_record(struct tdb_context *tdb, - unsigned int zone_bits, tdb_off_t off, tdb_len_t len_with_header) { struct tdb_free_record new; @@ -255,13 +167,11 @@ int add_free_record(struct tdb_context *tdb, int ret; assert(len_with_header >= sizeof(new)); - assert(zone_bits < 64); - new.magic_and_meta = TDB_FREE_MAGIC | zone_bits; + new.magic_and_meta = TDB_FREE_MAGIC; new.data_len = len_with_header - sizeof(struct tdb_used_record); - b_off = bucket_off(zone_off(off, zone_bits), - size_to_bucket(zone_bits, new.data_len)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(new.data_len)); if (tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) != 0) return -1; @@ -299,19 +209,14 @@ static size_t record_leftover(size_t keylen, size_t datalen, /* Note: we unlock the current bucket if we coalesce or fail. */ static int coalesce(struct tdb_context *tdb, - tdb_off_t zone_off, unsigned zone_bits, tdb_off_t off, tdb_off_t b_off, tdb_len_t data_len) { struct tdb_free_record pad, *r; - tdb_off_t zone_end, end; + tdb_off_t end; end = off + sizeof(struct tdb_used_record) + data_len; - zone_end = zone_off + (1ULL << zone_bits); - - if (tdb->methods->oob(tdb, zone_end, true)) - zone_end = tdb->map_size; - while (end < zone_end) { + while (end < tdb->map_size) { tdb_off_t nb_off; /* FIXME: do tdb_get here and below really win? */ @@ -322,8 +227,8 @@ static int coalesce(struct tdb_context *tdb, if (frec_magic(r) != TDB_FREE_MAGIC) break; - nb_off = bucket_off(zone_off, - size_to_bucket(zone_bits, r->data_len)); + /* FIXME: Use flist from record */ + nb_off = bucket_off(tdb->flist_off,size_to_bucket(r->data_len)); /* We may be violating lock order here, so best effort. */ if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1) @@ -341,8 +246,8 @@ static int coalesce(struct tdb_context *tdb, break; } - if (unlikely(bucket_off(zone_off, - size_to_bucket(zone_bits, r->data_len)) + if (unlikely(bucket_off(tdb->flist_off, + size_to_bucket(r->data_len)) != nb_off)) { tdb_unlock_free_bucket(tdb, nb_off); break; @@ -383,14 +288,14 @@ static int coalesce(struct tdb_context *tdb, /* We have to drop this to avoid deadlocks, so make sure record * doesn't get coalesced by someone else! */ - r->magic_and_meta = TDB_COALESCING_MAGIC | zone_bits; + r->magic_and_meta = TDB_COALESCING_MAGIC; r->data_len = end - off - sizeof(struct tdb_used_record); if (tdb_access_commit(tdb, r) != 0) goto err; tdb_unlock_free_bucket(tdb, b_off); - if (add_free_record(tdb, zone_bits, off, end - off) == -1) + if (add_free_record(tdb, off, end - off) == -1) return -1; return 1; @@ -402,8 +307,7 @@ err: /* We need size bytes to put our key and data in. */ static tdb_off_t lock_and_alloc(struct tdb_context *tdb, - tdb_off_t zone_off, - unsigned zone_bits, + tdb_off_t flist_off, tdb_off_t bucket, size_t keylen, size_t datalen, bool want_extra, @@ -415,11 +319,9 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb, size_t size = adjust_size(keylen, datalen); again: - b_off = bucket_off(zone_off, bucket); + b_off = bucket_off(flist_off, bucket); - /* FIXME: Try non-blocking wait first, to measure contention. - * If we're contented, try switching zones, and don't enlarge zone - * next time (we want more zones). */ + /* FIXME: Try non-blocking wait first, to measure contention. */ /* Lock this bucket. */ if (tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == -1) { return TDB_OFF_ERR; @@ -464,8 +366,7 @@ again: multiplier *= 1.01; /* Since we're going slow anyway, try coalescing here. */ - switch (coalesce(tdb, zone_off, zone_bits, off, b_off, - r->data_len)) { + switch (coalesce(tdb, off, b_off, r->data_len)) { case -1: /* This has already unlocked on error. */ return -1; @@ -493,7 +394,7 @@ again: * coalesce() could try to merge it! */ if (set_header(tdb, &rec, keylen, datalen, best.data_len - leftover, - hashlow, zone_bits) != 0) + hashlow) != 0) goto unlock_err; if (tdb_write_convert(tdb, best_off, &rec, sizeof(rec)) != 0) @@ -502,7 +403,7 @@ again: tdb_unlock_free_bucket(tdb, b_off); if (leftover) { - if (add_free_record(tdb, zone_bits, + if (add_free_record(tdb, best_off + sizeof(rec) + best.data_len - leftover, leftover)) @@ -519,69 +420,34 @@ unlock_err: return TDB_OFF_ERR; } -static bool next_zone(struct tdb_context *tdb) -{ - tdb_off_t next = tdb->zone_off + (1ULL << tdb->zhdr.zone_bits); - - /* We must have a header. */ - if (tdb->methods->oob(tdb, next + sizeof(tdb->zhdr), true)) - return false; - - tdb->zone_off = next; - return tdb_read_convert(tdb, next, &tdb->zhdr, sizeof(tdb->zhdr)) == 0; -} - -/* Offset returned is within current zone (which it may alter). */ +/* Get a free block from current free list, or 0 if none. */ static tdb_off_t get_free(struct tdb_context *tdb, size_t keylen, size_t datalen, bool want_extra, unsigned hashlow) { - tdb_off_t start_zone = tdb->zone_off, off; - bool wrapped = false; - size_t size = adjust_size(keylen, datalen); + tdb_off_t off; + unsigned start_b, b; /* If they are growing, add 50% to get to higher bucket. */ if (want_extra) - size += datalen / 2; - - /* FIXME: If we don't get a hit in the first bucket we want, - * try changing zones for next time. That should help wear - * zones evenly, so we don't need to search all of them before - * expanding. */ - while (!wrapped || tdb->zone_off != start_zone) { - tdb_off_t b; - - /* Shortcut for really huge allocations... */ - if ((size >> tdb->zhdr.zone_bits) != 0) - goto next; - - /* Start at exact size bucket, and search up... */ - b = size_to_bucket(tdb->zhdr.zone_bits, size); - for (b = find_free_head(tdb, b); - b <= BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits); - b = find_free_head(tdb, b + 1)) { - /* Try getting one from list. */ - off = lock_and_alloc(tdb, tdb->zone_off, - tdb->zhdr.zone_bits, - b, keylen, datalen, want_extra, - hashlow); - if (off == TDB_OFF_ERR) - return TDB_OFF_ERR; - if (off != 0) - return off; - /* Didn't work. Try next bucket. */ - } - - next: - /* Didn't work, try next zone, if it exists. */ - if (!next_zone(tdb)) { - wrapped = true; - tdb->zone_off = sizeof(struct tdb_header); - if (tdb_read_convert(tdb, tdb->zone_off, - &tdb->zhdr, sizeof(tdb->zhdr))) { - return TDB_OFF_ERR; - } - } + start_b = size_to_bucket(adjust_size(keylen, + datalen + datalen / 2)); + else + start_b = size_to_bucket(adjust_size(keylen, datalen)); + + /* Start at exact size bucket, and search up... */ + for (b = find_free_head(tdb, start_b); + b < TDB_FREE_BUCKETS; + b = find_free_head(tdb, b + 1)) { + /* Try getting one from list. */ + off = lock_and_alloc(tdb, tdb->flist_off, + b, keylen, datalen, want_extra, + hashlow); + if (off == TDB_OFF_ERR) + return TDB_OFF_ERR; + if (off != 0) + return off; + /* Didn't work. Try next bucket. */ } return 0; } @@ -589,15 +455,12 @@ static tdb_off_t get_free(struct tdb_context *tdb, int set_header(struct tdb_context *tdb, struct tdb_used_record *rec, uint64_t keylen, uint64_t datalen, - uint64_t actuallen, unsigned hashlow, - unsigned int zone_bits) + uint64_t actuallen, unsigned hashlow) { uint64_t keybits = (fls64(keylen) + 1) / 2; /* Use bottom bits of hash, so it's independent of hash table size. */ - rec->magic_and_meta - = zone_bits - | ((hashlow & ((1 << 5)-1)) << 6) + rec->magic_and_meta = (hashlow & ((1 << 11)-1)) | ((actuallen - (keylen + datalen)) << 11) | (keybits << 43) | (TDB_MAGIC << 48); @@ -617,26 +480,11 @@ int set_header(struct tdb_context *tdb, return 0; } -static bool zones_contended(struct tdb_context *tdb) -{ - return false; -} - -/* Assume we want buckets up to the comfort factor. */ -static tdb_len_t overhead(unsigned int zone_bits) -{ - return sizeof(struct free_zone_header) - + (BUCKETS_FOR_ZONE(zone_bits) + 1) * sizeof(tdb_off_t); -} - -/* Expand the database (by adding a zone). */ +/* Expand the database. */ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) { uint64_t old_size; - tdb_off_t off; - unsigned int num_buckets, zone_bits; - tdb_len_t wanted, expand; - struct free_zone_header zhdr; + tdb_len_t wanted; /* We need room for the record header too. */ wanted = sizeof(struct tdb_used_record) + size; @@ -648,91 +496,20 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size) /* Someone else may have expanded the file, so retry. */ old_size = tdb->map_size; tdb->methods->oob(tdb, tdb->map_size + 1, true); - if (tdb->map_size != old_size) - goto success; - - /* Treat last zone as minimum reasonable zone size. */ - off = last_zone(tdb, &zhdr); - if (off == TDB_OFF_ERR) - goto fail; - - /* Zone isn't fully expanded? */ - if (tdb->map_size < off + (1ULL << zhdr.zone_bits)) { - expand = off + (1ULL << zhdr.zone_bits) - tdb->map_size; - /* Expand more than we want. */ - if (expand > (wanted << TDB_COMFORT_FACTOR_BITS)) - expand = (wanted << TDB_COMFORT_FACTOR_BITS); - if (tdb->methods->expand_file(tdb, expand) == -1) - goto fail; - /* We need to drop this lock before adding free record. */ + if (tdb->map_size != old_size) { tdb_unlock_expand(tdb, F_WRLCK); - - /* Allocate from here. */ - tdb->zone_off = off; - tdb->zhdr = zhdr; - - /* FIXME: If this isn't sufficient, we search again... */ - return add_free_record(tdb, zhdr.zone_bits, - tdb->map_size - expand, expand); + return 0; } - /* We are never allowed to cross a power-of-two boundary, and our - * minimum zone size is 1 << INITIAL_ZONE_BITS. - * - * If our filesize is 128k, we can add a 64k or a 128k zone. If it's - * 192k, we can only add a 64k zone. - * - * In other words, our max zone size is (1 << (ffs(filesize) - 1)) */ - zone_bits = ffs64(old_size - sizeof(struct tdb_header)) - 1; - assert(zone_bits >= INITIAL_ZONE_BITS); - - /* Big zones generally good, but more zones wanted if contended. */ - if (zones_contended(tdb)) { - /* If it suffices, make zone same size as last one. */ - if (zhdr.zone_bits < zone_bits - && (1ULL << zhdr.zone_bits) >= overhead(zone_bits)+wanted) - zone_bits = zhdr.zone_bits; + if (tdb->methods->expand_file(tdb, wanted*TDB_EXTENSION_FACTOR) == -1) { + tdb_unlock_expand(tdb, F_WRLCK); + return -1; } - zhdr.zone_bits = zone_bits; - num_buckets = BUCKETS_FOR_ZONE(zone_bits); - - /* Expand the file by more than we need right now. */ - expand = 1ULL << zone_bits; - if (expand > overhead(zone_bits) + (wanted << TDB_COMFORT_FACTOR_BITS)) - expand = overhead(zone_bits) - + (wanted << TDB_COMFORT_FACTOR_BITS); - - if (tdb->methods->expand_file(tdb, expand) == -1) - goto fail; - - /* Write new zone header (at old end). */ - off = old_size; - if (tdb_write_convert(tdb, off, &zhdr, sizeof(zhdr)) == -1) - goto fail; - - /* Now write empty buckets. */ - off += sizeof(zhdr); - if (zero_out(tdb, off, (num_buckets+1) * sizeof(tdb_off_t)) == -1) - goto fail; - off += (num_buckets+1) * sizeof(tdb_off_t); - - /* Now add the rest as our free record. */ - if (add_free_record(tdb, zone_bits, off, expand - overhead(zone_bits)) - == -1) - goto fail; - - /* Try allocating from this zone now. */ - tdb->zone_off = old_size; - tdb->zhdr = zhdr; - -success: + /* We need to drop this lock before adding free record. */ tdb_unlock_expand(tdb, F_WRLCK); - return 0; -fail: - tdb_unlock_expand(tdb, F_WRLCK); - return -1; + return add_free_record(tdb, old_size, wanted * TDB_EXTENSION_FACTOR); } /* This won't fail: it will expand the database if it has to. */ diff --git a/ccan/tdb2/lock.c b/ccan/tdb2/lock.c index 10d1e18a..5d13a23a 100644 --- a/ccan/tdb2/lock.c +++ b/ccan/tdb2/lock.c @@ -257,7 +257,7 @@ static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype, { struct tdb_lock_type *new_lck; - if (offset >= TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) { + if (offset > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) { tdb->ecode = TDB_ERR_LOCK; tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, "tdb_nest_lock: invalid offset %llu ltype=%d\n", diff --git a/ccan/tdb2/private.h b/ccan/tdb2/private.h index 5e71bc2d..70580c46 100644 --- a/ccan/tdb2/private.h +++ b/ccan/tdb2/private.h @@ -92,10 +92,8 @@ typedef uint64_t tdb_off_t; /* And 8 entries in each group, ie 8 groups per sublevel. */ #define TDB_HASH_GROUP_BITS 3 -/* We start with a 64k-sized zone. */ -#define INITIAL_ZONE_BITS 16 -/* Try to create zones at least 32 times larger than allocations. */ -#define TDB_COMFORT_FACTOR_BITS 5 +/* Extend file by least 32 times larger than needed. */ +#define TDB_EXTENSION_FACTOR 32 /* We steal bits from the offsets to store hash info. */ #define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1) @@ -111,14 +109,13 @@ typedef uint64_t tdb_off_t; #define TDB_OFF_MASK \ (((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK) +/* How many buckets in a free list: see size_to_bucket(). */ +#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL) + /* We have to be able to fit a free record here. */ #define TDB_MIN_DATA_LEN \ (sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record)) -/* We ensure buckets up to size 1 << (zone_bits - TDB_COMFORT_FACTOR_BITS). */ -/* FIXME: test this matches size_to_bucket! */ -#define BUCKETS_FOR_ZONE(zone_bits) ((zone_bits) + 2 - TDB_COMFORT_FACTOR_BITS) - #if !HAVE_BSWAP_64 static inline uint64_t bswap_64(uint64_t x) { @@ -138,8 +135,7 @@ struct tdb_used_record { magic: 16, (highest) key_len_bits: 5, extra_padding: 32 - hash_bits: 5, - zone_bits: 6 (lowest) + hash_bits: 11 */ uint64_t magic_and_meta; /* The bottom key_len_bits*2 are key length, rest is data length. */ @@ -166,14 +162,9 @@ static inline uint64_t rec_extra_padding(const struct tdb_used_record *r) return (r->magic_and_meta >> 11) & 0xFFFFFFFF; } -static inline unsigned int rec_zone_bits(const struct tdb_used_record *r) -{ - return r->magic_and_meta & ((1 << 6) - 1); -} - static inline uint32_t rec_hash(const struct tdb_used_record *r) { - return (r->magic_and_meta >> 6) & ((1 << 5) - 1); + return r->magic_and_meta & ((1 << 11) - 1); } static inline uint16_t rec_magic(const struct tdb_used_record *r) @@ -182,32 +173,17 @@ static inline uint16_t rec_magic(const struct tdb_used_record *r) } struct tdb_free_record { - uint64_t magic_and_meta; /* Bottom 6 bits are zone bits. */ + uint64_t magic_and_meta; uint64_t data_len; /* Not counting these two fields. */ /* This is why the minimum record size is 16 bytes. */ uint64_t next, prev; }; -static inline unsigned int frec_zone_bits(const struct tdb_free_record *f) -{ - return f->magic_and_meta & ((1 << 6) - 1); -} - static inline uint64_t frec_magic(const struct tdb_free_record *f) { - return f->magic_and_meta & ~((1ULL << 6) - 1); + return f->magic_and_meta; } -/* Each zone has its set of free lists at the head. - * - * For each zone we have a series of per-size buckets, and a final bucket for - * "too big". */ -struct free_zone_header { - /* How much does this zone cover? */ - uint64_t zone_bits; - /* tdb_off_t buckets[free_buckets + 1] */ -}; - /* this is stored at the front of every database */ struct tdb_header { char magic_food[64]; /* for /etc/magic */ @@ -215,13 +191,19 @@ struct tdb_header { uint64_t version; /* version of the code */ uint64_t hash_test; /* result of hashing HASH_MAGIC. */ uint64_t hash_seed; /* "random" seed written at creation time. */ + tdb_off_t free_list; /* (First) free list. */ - tdb_off_t reserved[28]; + tdb_off_t reserved[27]; /* Top level hash table. */ tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS]; }; +struct tdb_freelist { + struct tdb_used_record hdr; + tdb_off_t buckets[TDB_FREE_BUCKETS]; +}; + /* Information about a particular (locked) hash entry. */ struct hash_info { /* Full hash value of entry. */ @@ -308,10 +290,8 @@ struct tdb_context { /* Set if we are in a transaction. */ struct tdb_transaction *transaction; - /* What zone of the tdb to use, for spreading load. */ - uint64_t zone_off; - /* Cached copy of zone header. */ - struct free_zone_header zhdr; + /* What freelist are we using? */ + uint64_t flist_off; /* IO methods: changes for transactions. */ const struct tdb_methods *methods; @@ -368,26 +348,25 @@ int delete_from_hash(struct tdb_context *tdb, struct hash_info *h); bool is_subhash(tdb_off_t val); /* free.c: */ -int tdb_zone_init(struct tdb_context *tdb); +int tdb_flist_init(struct tdb_context *tdb); /* If this fails, try tdb_expand. */ tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, uint64_t hash, bool growing); /* Put this record in a free list. */ -int add_free_record(struct tdb_context *tdb, unsigned int zone_bits, +int add_free_record(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len_with_header); /* Set up header for a used record. */ int set_header(struct tdb_context *tdb, struct tdb_used_record *rec, uint64_t keylen, uint64_t datalen, - uint64_t actuallen, unsigned hashlow, - unsigned int zone_bits); + uint64_t actuallen, unsigned hashlow); /* Used by tdb_check to verify. */ -unsigned int size_to_bucket(unsigned int free_buckets, tdb_len_t data_len); -tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket); +unsigned int size_to_bucket(tdb_len_t data_len); +tdb_off_t bucket_off(tdb_off_t flist_off, unsigned bucket); /* io.c: */ /* Initialize tdb->methods. */ diff --git a/ccan/tdb2/summary.c b/ccan/tdb2/summary.c index 3df822ba..dd6fa399 100644 --- a/ccan/tdb2/summary.c +++ b/ccan/tdb2/summary.c @@ -19,25 +19,6 @@ #include #include -static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max) -{ - if (bucket <= 8) { - *min = *max = TDB_MIN_DATA_LEN + bucket * 8; - } else if (bucket == 9) { - /* FIXME: This is twisted; fix size_to_bucket. */ - *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8; - *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8; - } else { - *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)); - *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8; - } - assert(size_to_bucket(63, *min) == bucket); - assert(size_to_bucket(63, *max) == bucket); - if (bucket > 8) - assert(size_to_bucket(63, *min - 8) == bucket - 1); - assert(size_to_bucket(63, *max + 8) == bucket + 1); -} - static int count_hash(struct tdb_context *tdb, tdb_off_t hash_off, unsigned bits) { @@ -54,48 +35,32 @@ static int count_hash(struct tdb_context *tdb, return count; } -static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, - struct tally *zones, - struct tally *hashes, - struct tally *free, - struct tally *keys, - struct tally *data, - struct tally *extra, - struct tally *uncoal, - uint64_t bucketlen[], - unsigned int *num_buckets) +static bool summarize(struct tdb_context *tdb, + struct tally *hashes, + struct tally *flists, + struct tally *free, + struct tally *keys, + struct tally *data, + struct tally *extra, + struct tally *uncoal, + struct tally *buckets) { - struct free_zone_header zhdr; - tdb_off_t off, end; + tdb_off_t off; tdb_len_t len; - unsigned int hdrlen; tdb_len_t unc = 0; - if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1) - return TDB_OFF_ERR; - - tally_add(zones, 1ULL << zhdr.zone_bits); - *num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits); - - hdrlen = sizeof(zhdr) - + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t); - - end = zone_off + (1ULL << zhdr.zone_bits); - if (end > tdb->map_size) - end = tdb->map_size; - - for (off = zone_off + hdrlen; off < end; off += len) { + for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) { union { struct tdb_used_record u; struct tdb_free_record f; } pad, *p; p = tdb_get(tdb, off, &pad, sizeof(pad)); if (!p) - return TDB_OFF_ERR; + return false; if (rec_magic(&p->u) != TDB_MAGIC) { len = p->f.data_len; tally_add(free, len); - bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++; + tally_add(buckets, size_to_bucket(len)); len += sizeof(p->u); unc++; } else { @@ -108,13 +73,19 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, + rec_data_length(&p->u) + rec_extra_padding(&p->u); - /* FIXME: Use different magic for hashes? */ - if (!rec_key_length(&p->u) && !rec_hash(&p->u)) { - int count = count_hash(tdb, off + sizeof(p->u), - TDB_SUBLEVEL_HASH_BITS); - if (count == -1) - return TDB_OFF_ERR; - tally_add(hashes, count); + /* FIXME: Use different magic for hashes, flists. */ + if (!rec_key_length(&p->u) && rec_hash(&p->u) < 2) { + if (rec_hash(&p->u) == 0) { + int count = count_hash(tdb, + off + sizeof(p->u), + TDB_SUBLEVEL_HASH_BITS); + if (count == -1) + return false; + tally_add(hashes, count); + } else { + tally_add(flists, + rec_data_length(&p->u)); + } } else { tally_add(keys, rec_key_length(&p->u)); tally_add(data, rec_data_length(&p->u)); @@ -124,13 +95,11 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, } if (unc) tally_add(uncoal, unc); - return 1ULL << zhdr.zone_bits; + return true; } #define SUMMARY_FORMAT \ "Size of file/data: %zu/%zu\n" \ - "Number of zones: %zu\n" \ - "Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \ "Number of records: %zu\n" \ "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \ "Smallest/average/largest data: %zu/%zu/%zu\n%s" \ @@ -139,10 +108,11 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \ "Number of uncoalesced records: %zu\n" \ "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ + "Number of free lists: %zu\n%s" \ "Toplevel hash used: %u of %u\n" \ "Number of subhashes: %zu\n" \ "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \ - "Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" + "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" #define BUCKET_SUMMARY_FORMAT_A \ "Free bucket %zu: total entries %zu.\n" \ @@ -156,17 +126,13 @@ static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) { - tdb_off_t off; tdb_len_t len; - unsigned int i, num_buckets, max_bucket = 0; - uint64_t total_buckets = 0; - struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal, - *buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL }; - char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, - *bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL }; + struct tally *flists, *hashes, *freet, *keys, *data, *extra, *uncoal, + *buckets; + char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, *bucketsg; char *ret = NULL; - zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg = NULL; + hashesg = freeg = keysg = datag = extrag = uncoalg = bucketsg = NULL; if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0) return NULL; @@ -177,71 +143,43 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) } /* Start stats off empty. */ - zones = tally_new(HISTO_HEIGHT); + flists = tally_new(HISTO_HEIGHT); hashes = tally_new(HISTO_HEIGHT); freet = tally_new(HISTO_HEIGHT); keys = tally_new(HISTO_HEIGHT); data = tally_new(HISTO_HEIGHT); extra = tally_new(HISTO_HEIGHT); uncoal = tally_new(HISTO_HEIGHT); - if (!zones || !hashes || !freet || !keys || !data || !extra - || !uncoal) { + buckets = tally_new(HISTO_HEIGHT); + if (!flists || !hashes || !freet || !keys || !data || !extra + || !uncoal || !buckets) { tdb->ecode = TDB_ERR_OOM; goto unlock; } - for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { - buckets[i] = tally_new(HISTO_HEIGHT); - if (!buckets[i]) { - tdb->ecode = TDB_ERR_OOM; - goto unlock; - } - } - - for (off = sizeof(struct tdb_header); - off < tdb->map_size - 1; - off += len) { - uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 }; - len = summarize_zone(tdb, off, zones, hashes, freet, keys, - data, extra, uncoal, bucketlen, - &num_buckets); - if (len == TDB_OFF_ERR) - goto unlock; - for (i = 0; i < num_buckets; i++) - tally_add(buckets[i], bucketlen[i]); - if (num_buckets > max_bucket) - max_bucket = num_buckets; - total_buckets += num_buckets; - } + if (!summarize(tdb, hashes, flists, freet, keys, data, extra, uncoal, + buckets)) + goto unlock; if (flags & TDB_SUMMARY_HISTOGRAMS) { - zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT); hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); - for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { - bucketsg[i] = tally_histogram(buckets[i], - HISTO_WIDTH, - HISTO_HEIGHT); - } + bucketsg = tally_histogram(buckets, HISTO_WIDTH, HISTO_HEIGHT); } /* 20 is max length of a %llu. */ len = strlen(SUMMARY_FORMAT) + 33*20 + 1 - + (zonesg ? strlen(zonesg) : 0) + (hashesg ? strlen(hashesg) : 0) + (freeg ? strlen(freeg) : 0) + (keysg ? strlen(keysg) : 0) + (datag ? strlen(datag) : 0) + (extrag ? strlen(extrag) : 0) - + (uncoalg ? strlen(uncoalg) : 0); - for (i = 0; i < max_bucket; i++) { - len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20 - + (bucketsg[i] ? strlen(bucketsg[i]) : 0); - } + + (uncoalg ? strlen(uncoalg) : 0) + + (bucketsg ? strlen(bucketsg) : 0); ret = malloc(len); if (!ret) @@ -250,9 +188,6 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) len = sprintf(ret, SUMMARY_FORMAT, (size_t)tdb->map_size, tally_num(keys) + tally_num(data), - tally_num(zones), - tally_min(zones), tally_mean(zones), tally_max(zones), - zonesg ? zonesg : "", tally_num(keys), tally_min(keys), tally_mean(keys), tally_max(keys), keysg ? keysg : "", @@ -266,6 +201,8 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) tally_total(uncoal, NULL), tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), uncoalg ? uncoalg : "", + tally_num(buckets), + bucketsg ? bucketsg : "", count_hash(tdb, offsetof(struct tdb_header, hashtable), TDB_TOPLEVEL_HASH_BITS), 1 << TDB_TOPLEVEL_HASH_BITS, @@ -278,53 +215,28 @@ char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) tally_total(freet, NULL) * 100.0 / tdb->map_size, (tally_num(keys) + tally_num(freet) + tally_num(hashes)) * sizeof(struct tdb_used_record) * 100.0 / tdb->map_size, - (tally_num(zones) * sizeof(struct free_zone_header) - + total_buckets * sizeof(tdb_off_t)) + tally_num(flists) * sizeof(struct tdb_freelist) * 100.0 / tdb->map_size, (tally_num(hashes) * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)) * 100.0 / tdb->map_size); - for (i = 0; i < max_bucket; i++) { - size_t min, max; - sizes_for_bucket(i, &min, &max); - if (min == max) { - len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A, - min, tally_total(buckets[i], NULL), - tally_min(buckets[i]), - tally_mean(buckets[i]), - tally_max(buckets[i]), - bucketsg[i] ? bucketsg[i] : ""); - } else { - len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B, - min, max, tally_total(buckets[i], NULL), - tally_min(buckets[i]), - tally_mean(buckets[i]), - tally_max(buckets[i]), - bucketsg[i] ? bucketsg[i] : ""); - } - } - unlock: - free(zonesg); free(hashesg); free(freeg); free(keysg); free(datag); free(extrag); free(uncoalg); - free(zones); + free(bucketsg); free(hashes); + free(buckets); free(freet); free(keys); free(data); free(extra); free(uncoal); - for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { - free(buckets[i]); - free(bucketsg[i]); - } tdb_allrecord_unlock(tdb, F_RDLCK); tdb_unlock_expand(tdb, F_RDLCK); diff --git a/ccan/tdb2/tdb.c b/ccan/tdb2/tdb.c index 79c76b19..c0561fb9 100644 --- a/ccan/tdb2/tdb.c +++ b/ccan/tdb2/tdb.c @@ -80,9 +80,7 @@ static uint64_t random_number(struct tdb_context *tdb) struct new_database { struct tdb_header hdr; - /* Initial free zone. */ - struct free_zone_header zhdr; - tdb_off_t free[BUCKETS_FOR_ZONE(INITIAL_ZONE_BITS) + 1]; + struct tdb_freelist flist; }; /* initialise a new database */ @@ -110,8 +108,10 @@ static int tdb_new_database(struct tdb_context *tdb, memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable)); /* Free is empty. */ - newdb.zhdr.zone_bits = INITIAL_ZONE_BITS; - memset(newdb.free, 0, sizeof(newdb.free)); + newdb.hdr.free_list = offsetof(struct new_database, flist); + memset(&newdb.flist, 0, sizeof(newdb.flist)); + set_header(NULL, &newdb.flist.hdr, 0, + sizeof(newdb.flist.buckets), sizeof(newdb.flist.buckets), 1); /* Magic food */ memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); @@ -176,7 +176,6 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, tdb->log_priv = NULL; tdb->transaction = NULL; tdb_hash_init(tdb); - /* last_zone will be set below. */ tdb_io_init(tdb); tdb_lock_init(tdb); @@ -230,7 +229,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, } tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed)); tdb->hash_seed = hdr.hash_seed; - tdb_zone_init(tdb); + tdb_flist_init(tdb); return tdb; } @@ -314,8 +313,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, /* This make sure we have current map_size and mmap. */ tdb->methods->oob(tdb, tdb->map_size + 1, true); - /* Now we can pick a random free zone to start from. */ - if (tdb_zone_init(tdb) == -1) + if (tdb_flist_init(tdb) == -1) goto fail; tdb->next = tdbs; @@ -358,8 +356,7 @@ static int update_rec_hdr(struct tdb_context *tdb, { uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); - if (set_header(tdb, rec, keylen, datalen, keylen + dataroom, h, - rec_zone_bits(rec))) + if (set_header(tdb, rec, keylen, datalen, keylen + dataroom, h)) return -1; return tdb_write_convert(tdb, off, rec, sizeof(*rec)); @@ -370,7 +367,6 @@ static int replace_data(struct tdb_context *tdb, struct hash_info *h, struct tdb_data key, struct tdb_data dbuf, tdb_off_t old_off, tdb_len_t old_room, - unsigned old_zone, bool growing) { tdb_off_t new_off; @@ -382,7 +378,7 @@ static int replace_data(struct tdb_context *tdb, /* We didn't like the existing one: remove it. */ if (old_off) { - add_free_record(tdb, old_zone, old_off, + add_free_record(tdb, old_off, sizeof(struct tdb_used_record) + key.dsize + old_room); if (replace_in_hash(tdb, h, new_off) == -1) @@ -454,8 +450,7 @@ int tdb_store(struct tdb_context *tdb, } /* If we didn't use the old record, this implies we're growing. */ - ret = replace_data(tdb, &h, key, dbuf, off, old_room, - rec_zone_bits(&rec), off != 0); + ret = replace_data(tdb, &h, key, dbuf, off, old_room, off != 0); tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); return ret; @@ -524,8 +519,7 @@ int tdb_append(struct tdb_context *tdb, } /* If they're using tdb_append(), it implies they're growing record. */ - ret = replace_data(tdb, &h, key, new_dbuf, off, - old_room, rec_zone_bits(&rec), true); + ret = replace_data(tdb, &h, key, new_dbuf, off, old_room, true); tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); free(newdata); @@ -580,7 +574,7 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key) goto unlock_err; /* Free the deleted entry. */ - if (add_free_record(tdb, rec_zone_bits(&rec), off, + if (add_free_record(tdb, off, sizeof(struct tdb_used_record) + rec_key_length(&rec) + rec_data_length(&rec) diff --git a/ccan/tdb2/test/layout.c b/ccan/tdb2/test/layout.c index 02afdd23..67a6633d 100644 --- a/ccan/tdb2/test/layout.c +++ b/ccan/tdb2/test/layout.c @@ -23,15 +23,10 @@ static void add(struct tdb_layout *layout, union tdb_layout_elem elem) layout->elem[layout->num_elems++] = elem; } -void tdb_layout_add_zone(struct tdb_layout *layout, - unsigned int zone_bits, - bool fill_prev) +void tdb_layout_add_freelist(struct tdb_layout *layout) { union tdb_layout_elem elem; - if (fill_prev) - tdb_layout_add_free(layout, 0); - elem.base.type = ZONE; - elem.zone.zone_bits = zone_bits; + elem.base.type = FREELIST; add(layout, elem); } @@ -85,10 +80,9 @@ static tdb_len_t hashtable_len(struct tle_hashtable *htable) + htable->extra; } -static tdb_len_t zone_header_len(struct tle_zone *zone) +static tdb_len_t freelist_len(struct tle_freelist *flist) { - return sizeof(struct free_zone_header) - + sizeof(tdb_off_t) * (BUCKETS_FOR_ZONE(zone->zone_bits)+1); + return sizeof(struct tdb_freelist); } static void set_free_record(void *mem, tdb_len_t len) @@ -97,47 +91,43 @@ static void set_free_record(void *mem, tdb_len_t len) } static void set_data_record(void *mem, struct tdb_context *tdb, - struct tle_zone *last_zone, struct tle_used *used) { struct tdb_used_record *u = mem; set_header(tdb, u, used->key.dsize, used->data.dsize, used->key.dsize + used->data.dsize + used->extra, - tdb_hash(tdb, used->key.dptr, used->key.dsize), - last_zone->zone_bits); + tdb_hash(tdb, used->key.dptr, used->key.dsize)); memcpy(u + 1, used->key.dptr, used->key.dsize); memcpy((char *)(u + 1) + used->key.dsize, used->data.dptr, used->data.dsize); } static void set_hashtable(void *mem, struct tdb_context *tdb, - struct tle_zone *last_zone, struct tle_hashtable *htable) { struct tdb_used_record *u = mem; tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS; - set_header(tdb, u, 0, len, len + htable->extra, 0, - last_zone->zone_bits); + set_header(tdb, u, 0, len, len + htable->extra, 0); memset(u + 1, 0, len); } -static void set_zone(void *mem, struct tdb_context *tdb, - struct tle_zone *zone) +static void set_freelist(void *mem, struct tdb_context *tdb, + struct tle_freelist *freelist) { - struct free_zone_header *fz = mem; - memset(fz, 0, zone_header_len(zone)); - fz->zone_bits = zone->zone_bits; + struct tdb_freelist *flist = mem; + memset(flist, 0, sizeof(*flist)); + set_header(tdb, &flist->hdr, 0, + sizeof(*flist) - sizeof(flist->hdr), + sizeof(*flist) - sizeof(flist->hdr), 1); } static void add_to_freetable(struct tdb_context *tdb, - struct tle_zone *last_zone, tdb_off_t eoff, tdb_off_t elen) { - add_free_record(tdb, last_zone->zone_bits, eoff, - sizeof(struct tdb_used_record) + elen); + add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen); } static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup) @@ -204,15 +194,10 @@ static void add_to_hashtable(struct tdb_context *tdb, struct tdb_context *tdb_layout_get(struct tdb_layout *layout) { unsigned int i; - tdb_off_t off, len; - tdb_len_t zone_left; + tdb_off_t off, len, flist_off = 0; char *mem; struct tdb_context *tdb; - struct tle_zone *last_zone = NULL; - assert(layout->elem[0].base.type == ZONE); - - zone_left = 0; off = sizeof(struct tdb_header); /* First pass of layout: calc lengths */ @@ -220,15 +205,12 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout) union tdb_layout_elem *e = &layout->elem[i]; e->base.off = off; switch (e->base.type) { - case ZONE: - assert(zone_left == 0); - len = zone_header_len(&e->zone); - zone_left = 1ULL << e->zone.zone_bits; + case FREELIST: + assert(flist_off == 0); + flist_off = off; + len = freelist_len(&e->flist); break; case FREE: - if (e->free.len == 0) - e->free.len = zone_left - - sizeof(struct tdb_used_record); len = free_record_len(e->free.len); break; case DATA: @@ -241,9 +223,9 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout) abort(); } off += len; - assert(zone_left >= len); - zone_left -= len; } + /* Must have a free list! */ + assert(flist_off); mem = malloc(off); /* Now populate our header, cribbing from a real TDB header. */ @@ -254,24 +236,22 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout) free(tdb->map_ptr); tdb->map_ptr = mem; tdb->map_size = off; + tdb->flist_off = flist_off; for (i = 0; i < layout->num_elems; i++) { union tdb_layout_elem *e = &layout->elem[i]; switch (e->base.type) { - case ZONE: - set_zone(mem + e->base.off, tdb, &e->zone); - last_zone = &e->zone; + case FREELIST: + set_freelist(mem + e->base.off, tdb, &e->flist); break; case FREE: set_free_record(mem + e->base.off, e->free.len); break; case DATA: - set_data_record(mem + e->base.off, tdb, last_zone, - &e->used); + set_data_record(mem + e->base.off, tdb, &e->used); break; case HASHTABLE: - set_hashtable(mem + e->base.off, tdb, last_zone, - &e->hashtable); + set_hashtable(mem + e->base.off, tdb, &e->hashtable); break; } } @@ -280,12 +260,8 @@ struct tdb_context *tdb_layout_get(struct tdb_layout *layout) for (i = 0; i < layout->num_elems; i++) { union tdb_layout_elem *e = &layout->elem[i]; switch (e->base.type) { - case ZONE: - last_zone = &e->zone; - break; case FREE: - add_to_freetable(tdb, last_zone, - e->base.off, e->free.len); + add_to_freetable(tdb, e->base.off, e->free.len); break; case DATA: add_to_hashtable(tdb, e->base.off, e->used.key); diff --git a/ccan/tdb2/test/layout.h b/ccan/tdb2/test/layout.h index 6cbf3d0d..216fe297 100644 --- a/ccan/tdb2/test/layout.h +++ b/ccan/tdb2/test/layout.h @@ -3,9 +3,7 @@ #include struct tdb_layout *new_tdb_layout(const char *filename); -void tdb_layout_add_zone(struct tdb_layout *layout, - unsigned int zone_bits, - bool fill_prev); +void tdb_layout_add_freelist(struct tdb_layout *layout); void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len); void tdb_layout_add_used(struct tdb_layout *layout, TDB_DATA key, TDB_DATA data, @@ -19,7 +17,7 @@ void tdb_layout_add_hashtable(struct tdb_layout *layout, struct tdb_context *tdb_layout_get(struct tdb_layout *layout); enum layout_type { - ZONE, FREE, DATA, HASHTABLE, + FREELIST, FREE, DATA, HASHTABLE, }; /* Shared by all union members. */ @@ -28,9 +26,8 @@ struct tle_base { tdb_off_t off; }; -struct tle_zone { +struct tle_freelist { struct tle_base base; - unsigned int zone_bits; }; struct tle_free { @@ -54,7 +51,7 @@ struct tle_hashtable { union tdb_layout_elem { struct tle_base base; - struct tle_zone zone; + struct tle_freelist flist; struct tle_free free; struct tle_used used; struct tle_hashtable hashtable; diff --git a/ccan/tdb2/test/run-001-encode.c b/ccan/tdb2/test/run-001-encode.c index f04be339..7a4fc06e 100644 --- a/ccan/tdb2/test/run-001-encode.c +++ b/ccan/tdb2/test/run-001-encode.c @@ -13,18 +13,16 @@ int main(int argc, char *argv[]) struct tdb_used_record rec; struct tdb_context tdb = { .log = tap_log_fn, .log_priv = NULL }; - plan_tests(64 + 32 + 48*7 + 1); + plan_tests(64 + 32 + 48*6 + 1); /* We should be able to encode any data value. */ for (i = 0; i < 64; i++) - ok1(set_header(&tdb, &rec, 0, 1ULL << i, 1ULL << i, 0, 0) - == 0); + ok1(set_header(&tdb, &rec, 0, 1ULL << i, 1ULL << i, 0) == 0); /* And any key and data with < 64 bits between them. */ for (i = 0; i < 32; i++) { tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; - ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen, 0, 0) - == 0); + ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen, 0) == 0); } /* We should neatly encode all values. */ @@ -33,15 +31,12 @@ int main(int argc, char *argv[]) uint64_t klen = 1ULL << (i < 16 ? i : 15); uint64_t dlen = 1ULL << i; uint64_t xlen = 1ULL << (i < 32 ? i : 31); - uint64_t zbits = 1ULL << (i < 6 ? i : 5); - ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen + xlen, h, - zbits) + ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen + xlen, h) == 0); ok1(rec_key_length(&rec) == klen); ok1(rec_data_length(&rec) == dlen); ok1(rec_extra_padding(&rec) == xlen); ok1((uint64_t)rec_hash(&rec) == h); - ok1(rec_zone_bits(&rec) == zbits); ok1(rec_magic(&rec) == TDB_MAGIC); } ok1(tap_log_messages == 0); diff --git a/ccan/tdb2/test/run-01-zones.c b/ccan/tdb2/test/run-01-zones.c deleted file mode 100644 index 405bd453..00000000 --- a/ccan/tdb2/test/run-01-zones.c +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "logging.h" -#include "layout.h" - -/* Calculate start of zone offset from layout directly. */ -static tdb_off_t layout_zone_off(tdb_off_t off, struct tdb_layout *layout) -{ - unsigned int i; - - /* Every second one is a free entry, so divide by 2 to get zone */ - for (i = 0; i < layout->num_elems; i++) { - if (layout->elem[i].base.type != ZONE) - continue; - if (layout->elem[i].base.off - + (1ULL << layout->elem[i].zone.zone_bits) > off) - return layout->elem[i].base.off; - } - abort(); -} - -int main(int argc, char *argv[]) -{ - struct tdb_context *tdb; - struct tdb_layout *layout; - struct free_zone_header zhdr; - tdb_off_t off, step; - unsigned int i; - - /* FIXME: Test TDB_CONVERT */ - - plan_tests(3 + 100); - - /* No coalescing can be done due to EOF */ - layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, INITIAL_ZONE_BITS, false); - tdb_layout_add_zone(layout, INITIAL_ZONE_BITS, true); - tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+1, true); - tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+2, true); - tdb_layout_add_zone(layout, INITIAL_ZONE_BITS+2, true); - tdb = tdb_layout_get(layout); - - ok1(tdb_check(tdb, NULL, NULL) == 0); - - /* Last zone should get right zone. */ - ok1(last_zone(tdb, &zhdr) - == layout->elem[layout->num_elems-1].base.off); - ok1(zhdr.zone_bits == INITIAL_ZONE_BITS+2); - - off = sizeof(struct tdb_header); - step = (tdb->map_size - 1 - off) / 100; - for (i = 0; i < 100; i++, off += step) { - ok1(off_to_zone(tdb, off, &zhdr) == layout_zone_off(off, layout)); - } - - return exit_status(); -} diff --git a/ccan/tdb2/test/run-02-expand.c b/ccan/tdb2/test/run-02-expand.c index b681c05c..aa5d5679 100644 --- a/ccan/tdb2/test/run-02-expand.c +++ b/ccan/tdb2/test/run-02-expand.c @@ -16,7 +16,7 @@ int main(int argc, char *argv[]) TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 18 + 1); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { tdb = tdb_open("run-expand.tdb", flags[i], @@ -25,47 +25,15 @@ int main(int argc, char *argv[]) if (!tdb) continue; - /* First expand. Should not fill zone. */ - val = tdb->map_size - sizeof(struct tdb_header); + val = tdb->map_size; ok1(tdb_expand(tdb, 1) == 0); - ok1(tdb->map_size < sizeof(struct tdb_header) - + (1 << INITIAL_ZONE_BITS)); + ok1(tdb->map_size >= val + 1 * TDB_EXTENSION_FACTOR); ok1(tdb_check(tdb, NULL, NULL) == 0); - /* Fill zone. */ - val = (1<map_size - sizeof(struct tdb_header)); - ok1(tdb_expand(tdb, val) == 0); - ok1(tdb->map_size == sizeof(struct tdb_header) - + (1 << INITIAL_ZONE_BITS)); + val = tdb->map_size; + ok1(tdb_expand(tdb, 1024) == 0); + ok1(tdb->map_size >= val + 1024 * TDB_EXTENSION_FACTOR); ok1(tdb_check(tdb, NULL, NULL) == 0); - - /* Second expand, adds another zone of same size. */ - ok1(tdb_expand(tdb, 4 << INITIAL_ZONE_BITS) == 0); - ok1(tdb->map_size == - (2<map_size == - (4<map_size == - (8<> TDB_COMFORT_FACTOR_BITS) - - sizeof(struct tdb_used_record)) == 0); - ok1(tdb->map_size < (12<elem[0].base.off; ok1(tdb_check(tdb, NULL, NULL) == 0); ok1(free_record_length(tdb, layout->elem[1].base.off) == len); /* Figure out which bucket free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(len)); /* Lock and fail to coalesce. */ ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, len) == 0); + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len) == 0); tdb_unlock_free_bucket(tdb, b_off); ok1(free_record_length(tdb, layout->elem[1].base.off) == len); ok1(tdb_check(tdb, NULL, NULL) == 0); @@ -59,20 +56,18 @@ int main(int argc, char *argv[]) /* No coalescing can be done due to used record */ layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, zone_bits, false); + tdb_layout_add_freelist(layout); tdb_layout_add_free(layout, 1024); tdb_layout_add_used(layout, key, data, 6); tdb = tdb_layout_get(layout); - zone_off = layout->elem[0].base.off; ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); ok1(tdb_check(tdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(1024)); /* Lock and fail to coalesce. */ ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, 1024) == 0); + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 0); tdb_unlock_free_bucket(tdb, b_off); ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); ok1(tdb_check(tdb, NULL, NULL) == 0); @@ -80,21 +75,19 @@ int main(int argc, char *argv[]) /* Coalescing can be done due to two free records, then EOF */ layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, zone_bits, false); + tdb_layout_add_freelist(layout); tdb_layout_add_free(layout, 1024); tdb_layout_add_free(layout, 2048); tdb = tdb_layout_get(layout); - zone_off = layout->elem[0].base.off; ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048); ok1(tdb_check(tdb, NULL, NULL) == 0); /* Figure out which bucket (first) free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(1024)); /* Lock and coalesce. */ ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, 1024) == 1); + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1); ok1(!tdb_has_locks(tdb)); ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024 + sizeof(struct tdb_used_record) + 2048); @@ -103,22 +96,20 @@ int main(int argc, char *argv[]) /* Coalescing can be done due to two free records, then data */ layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, zone_bits, false); + tdb_layout_add_freelist(layout); tdb_layout_add_free(layout, 1024); tdb_layout_add_free(layout, 512); tdb_layout_add_used(layout, key, data, 6); tdb = tdb_layout_get(layout); - zone_off = layout->elem[0].base.off; ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); ok1(free_record_length(tdb, layout->elem[2].base.off) == 512); ok1(tdb_check(tdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(1024)); /* Lock and coalesce. */ ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, 1024) == 1); + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1); ok1(!tdb_has_locks(tdb)); ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024 + sizeof(struct tdb_used_record) + 512); @@ -127,23 +118,21 @@ int main(int argc, char *argv[]) /* Coalescing can be done due to three free records, then EOF */ layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, zone_bits, false); + tdb_layout_add_freelist(layout); tdb_layout_add_free(layout, 1024); tdb_layout_add_free(layout, 512); tdb_layout_add_free(layout, 256); tdb = tdb_layout_get(layout); - zone_off = layout->elem[0].base.off; ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); ok1(free_record_length(tdb, layout->elem[2].base.off) == 512); ok1(free_record_length(tdb, layout->elem[3].base.off) == 256); ok1(tdb_check(tdb, NULL, NULL) == 0); /* Figure out which bucket free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, 1024)); + b_off = bucket_off(tdb->flist_off, size_to_bucket(1024)); /* Lock and coalesce. */ ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, 1024) == 1); + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024) == 1); ok1(!tdb_has_locks(tdb)); ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024 + sizeof(struct tdb_used_record) + 512 @@ -151,28 +140,6 @@ int main(int argc, char *argv[]) ok1(tdb_check(tdb, NULL, NULL) == 0); tdb_close(tdb); - /* Coalescing across two zones isn't possible. */ - layout = new_tdb_layout(NULL); - tdb_layout_add_zone(layout, zone_bits, false); - tdb_layout_add_zone(layout, zone_bits, true); - tdb = tdb_layout_get(layout); - zone_off = layout->elem[0].base.off; - len = layout->elem[1].free.len; - ok1(free_record_length(tdb, layout->elem[1].base.off) == len); - ok1(tdb_check(tdb, NULL, NULL) == 0); - - /* Figure out which list free entry is. */ - b_off = bucket_off(zone_off, size_to_bucket(zone_bits, len)); - /* Lock and coalesce. */ - ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); - ok1(coalesce(tdb, zone_off, zone_bits, layout->elem[1].base.off, - b_off, len) == 0); - tdb_unlock_free_bucket(tdb, b_off); - ok1(!tdb_has_locks(tdb)); - ok1(free_record_length(tdb, layout->elem[1].base.off) == len); - ok1(tdb_check(tdb, NULL, NULL) == 0); - tdb_close(tdb); - ok1(tap_log_messages == 0); return exit_status(); } diff --git a/ccan/tdb2/test/run-04-basichash.c b/ccan/tdb2/test/run-04-basichash.c index db895b6c..d5353398 100644 --- a/ccan/tdb2/test/run-04-basichash.c +++ b/ccan/tdb2/test/run-04-basichash.c @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) /* Simple delete should work. */ ok1(delete_from_hash(tdb, &h) == 0); - ok1(add_free_record(tdb, rec_zone_bits(&rec), new_off, + ok1(add_free_record(tdb, new_off, sizeof(struct tdb_used_record) + rec_key_length(&rec) + rec_data_length(&rec) diff --git a/ccan/tdb2/test/run-30-exhaust-before-expand.c b/ccan/tdb2/test/run-30-exhaust-before-expand.c index eb656954..e2e27295 100644 --- a/ccan/tdb2/test/run-30-exhaust-before-expand.c +++ b/ccan/tdb2/test/run-30-exhaust-before-expand.c @@ -8,6 +8,23 @@ #include #include "logging.h" +static bool empty_freelist(struct tdb_context *tdb) +{ + struct tdb_freelist free; + unsigned int i; + + /* Now, free list should be completely exhausted in zone 0 */ + if (tdb_read_convert(tdb, tdb->flist_off, &free, sizeof(free)) != 0) + abort(); + + for (i = 0; i < sizeof(free.buckets)/sizeof(free.buckets[0]); i++) { + if (free.buckets[i]) + return false; + } + return true; +} + + int main(int argc, char *argv[]) { unsigned int i, j; @@ -16,12 +33,12 @@ int main(int argc, char *argv[]) TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; - plan_tests(sizeof(flags) / sizeof(flags[0]) * 5 + 1); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { - tdb_off_t free[BUCKETS_FOR_ZONE(INITIAL_ZONE_BITS) + 1]; - bool all_empty; - TDB_DATA k, d; + TDB_DATA k; + uint64_t size; + bool was_empty = false; k.dptr = (void *)&j; k.dsize = sizeof(j); @@ -32,38 +49,23 @@ int main(int argc, char *argv[]) if (!tdb) continue; - /* We don't want the hash to expand, so we use one alloc to - * chew up over most of the space first. */ - j = -1; - d.dsize = (1 << INITIAL_ZONE_BITS) - 500; - d.dptr = malloc(d.dsize); - ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0); - ok1(tdb->map_size == sizeof(struct tdb_header) - + (1 << INITIAL_ZONE_BITS)); + ok1(empty_freelist(tdb)); + /* Create some free space. */ + ok1(tdb_expand(tdb, 1) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(!empty_freelist(tdb)); - /* Insert minimal-length records until we add a zone. */ - for (j = 0; - tdb->map_size == sizeof(struct tdb_header) - + (1 << INITIAL_ZONE_BITS); - j++) { + size = tdb->map_size; + /* Insert minimal-length records until we expand. */ + for (j = 0; tdb->map_size == size; j++) { + was_empty = empty_freelist(tdb); if (tdb_store(tdb, k, k, TDB_INSERT) != 0) err(1, "Failed to store record %i", j); } - /* Now, free list should be completely exhausted in zone 0 */ - ok1(tdb_read_convert(tdb, - sizeof(struct tdb_header) - + sizeof(struct free_zone_header), - &free, sizeof(free)) == 0); - - all_empty = true; - for (j = 0; j < sizeof(free)/sizeof(free[0]); j++) { - if (free[j]) { - diag("Free bucket %i not empty", j); - all_empty = false; - } - } - ok1(all_empty); + /* Would have been empty before expansion, but no longer. */ + ok1(was_empty); + ok1(!empty_freelist(tdb)); tdb_close(tdb); } diff --git a/ccan/tdb2/test/run-firstkey-nextkey.c b/ccan/tdb2/test/run-firstkey-nextkey.c index 222d1b53..db52c852 100644 --- a/ccan/tdb2/test/run-firstkey-nextkey.c +++ b/ccan/tdb2/test/run-firstkey-nextkey.c @@ -44,15 +44,21 @@ int main(int argc, char *argv[]) struct trav_data td; TDB_DATA k, k2; struct tdb_context *tdb; + union tdb_attribute seed_attr; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + seed_attr.base.attr = TDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 6334326220117065685ULL; + plan_tests(sizeof(flags) / sizeof(flags[0]) * (NUM_RECORDS*4 + (NUM_RECORDS-1)*2 + 20) + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { tdb = tdb_open("run-traverse.tdb", flags[i], - O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr); ok1(tdb); if (!tdb) continue; diff --git a/ccan/tdb2/test/run-summary.c b/ccan/tdb2/test/run-summary.c index 006b87b6..450f0905 100644 --- a/ccan/tdb2/test/run-summary.c +++ b/ccan/tdb2/test/run-summary.c @@ -19,7 +19,7 @@ int main(int argc, char *argv[]) struct tdb_data data = { (unsigned char *)&j, sizeof(j) }; char *summary; - plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 17) + 1); + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 4) + 1); for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { tdb = tdb_open("run-summary.tdb", flags[i], O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); @@ -42,19 +42,6 @@ int main(int argc, char *argv[]) ok1(strstr(summary, "Number of records: 500\n")); ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n")); ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n")); - ok1(strstr(summary, "Free bucket 16:")); - ok1(strstr(summary, "Free bucket 24:")); - ok1(strstr(summary, "Free bucket 32:")); - ok1(strstr(summary, "Free bucket 40:")); - ok1(strstr(summary, "Free bucket 48:")); - ok1(strstr(summary, "Free bucket 56:")); - ok1(strstr(summary, "Free bucket 64:")); - ok1(strstr(summary, "Free bucket 72:")); - ok1(strstr(summary, "Free bucket 80:")); - ok1(strstr(summary, "Free bucket 88-136:")); - ok1(strstr(summary, "Free bucket 144-264:")); - ok1(strstr(summary, "Free bucket 272-520:")); - ok1(strstr(summary, "Free bucket 528-1032:")); if (j == TDB_SUMMARY_HISTOGRAMS) ok1(strstr(summary, "|") && strstr(summary, "*"));