From: Rusty Russell Date: Tue, 14 Sep 2010 10:36:17 +0000 (+0930) Subject: tdb2: implement tdb_firstkey/tdb_nextkey. X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=bee60dd0ffe0b1b5821061b4384000c4246f2051 tdb2: implement tdb_firstkey/tdb_nextkey. --- diff --git a/ccan/tdb2/hash.c b/ccan/tdb2/hash.c index 71ede4db..0fd47749 100644 --- a/ccan/tdb2/hash.c +++ b/ccan/tdb2/hash.c @@ -131,14 +131,15 @@ bool is_subhash(tdb_off_t val) /* This is the core routine which searches the hashtable for an entry. * On error, no locks are held and TDB_OFF_ERR is returned. - * Otherwise, hinfo is filled in. + * Otherwise, hinfo is filled in (and the optional tinfo). * If not found, the return value is 0. * If found, the return value is the offset, and *rec is the record. */ tdb_off_t find_and_lock(struct tdb_context *tdb, struct tdb_data key, int ltype, struct hash_info *h, - struct tdb_used_record *rec) + struct tdb_used_record *rec, + struct traverse_info *tinfo) { uint32_t i, group; tdb_off_t hashtable; @@ -158,6 +159,14 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, return TDB_OFF_ERR; hashtable = offsetof(struct tdb_header, hashtable); + if (tinfo) { + tinfo->toplevel_group = group; + tinfo->num_levels = 1; + tinfo->levels[0].entry = 0; + tinfo->levels[0].hashtable = hashtable + + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t); + tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; + } while (likely(h->hash_used < 64)) { /* Read in the hash group. */ @@ -172,9 +181,23 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, if (is_subhash(h->group[h->home_bucket])) { hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK) + sizeof(struct tdb_used_record); + if (tinfo) { + /* When we come back, use *next* bucket */ + tinfo->levels[tinfo->num_levels-1].entry + += h->home_bucket + 1; + } group = use_bits(h, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); + if (tinfo) { + tinfo->levels[tinfo->num_levels].hashtable + = hashtable; + tinfo->levels[tinfo->num_levels].total_buckets + = 1 << TDB_SUBLEVEL_HASH_BITS; + tinfo->levels[tinfo->num_levels].entry + = group << TDB_HASH_GROUP_BITS; + tinfo->num_levels++; + } continue; } @@ -189,8 +212,14 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, if (!h->group[h->found_bucket]) break; - if (match(tdb, h, &key, h->group[h->found_bucket], rec)) + if (match(tdb, h, &key, h->group[h->found_bucket], + rec)) { + if (tinfo) { + tinfo->levels[tinfo->num_levels-1].entry + += h->found_bucket; + } return h->group[h->found_bucket] & TDB_OFF_MASK; + } } /* Didn't find it: h indicates where it would go. */ return 0; @@ -470,17 +499,18 @@ again: if (unlikely(val == TDB_OFF_ERR)) return TDB_OFF_ERR; + off = val & TDB_OFF_MASK; + /* This makes the delete-all-in-traverse case work * (and simplifies our logic a little). */ - if (val == tinfo->prev) + if (off == tinfo->prev) continue; tlevel->entry = i; - off = val & TDB_OFF_MASK; if (!is_subhash(val)) { /* Found one. */ - tinfo->prev = val; + tinfo->prev = off; return off; } @@ -572,3 +602,41 @@ int first_in_hash(struct tdb_context *tdb, int ltype, return next_in_hash(tdb, ltype, tinfo, kbuf, dlen); } + +TDB_DATA tdb_firstkey(struct tdb_context *tdb) +{ + struct traverse_info tinfo; + struct tdb_data k; + switch (first_in_hash(tdb, F_RDLCK, &tinfo, &k, NULL)) { + case 1: + return k; + case 0: + tdb->ecode = TDB_SUCCESS; + /* Fall thru... */ + default: + return tdb_null; + } +} + +/* We lock twice, not very efficient. We could keep last key & tinfo cached. */ +TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key) +{ + struct traverse_info tinfo; + struct hash_info h; + struct tdb_used_record rec; + + tinfo.prev = find_and_lock(tdb, key, F_RDLCK, &h, &rec, &tinfo); + if (unlikely(tinfo.prev == TDB_OFF_ERR)) + return tdb_null; + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + + switch (next_in_hash(tdb, F_RDLCK, &tinfo, &key, NULL)) { + case 1: + return key; + case 0: + tdb->ecode = TDB_SUCCESS; + /* Fall thru... */ + default: + return tdb_null; + } +} diff --git a/ccan/tdb2/private.h b/ccan/tdb2/private.h index 9b225521..19d7866d 100644 --- a/ccan/tdb2/private.h +++ b/ccan/tdb2/private.h @@ -349,7 +349,8 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, struct tdb_data key, int ltype, struct hash_info *h, - struct tdb_used_record *rec); + struct tdb_used_record *rec, + struct traverse_info *tinfo); int replace_in_hash(struct tdb_context *tdb, struct hash_info *h, diff --git a/ccan/tdb2/tdb.c b/ccan/tdb2/tdb.c index 659d39b2..9468e604 100644 --- a/ccan/tdb2/tdb.c +++ b/ccan/tdb2/tdb.c @@ -432,7 +432,7 @@ int tdb_store(struct tdb_context *tdb, struct tdb_used_record rec; int ret; - off = find_and_lock(tdb, key, F_WRLCK, &h, &rec); + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (unlikely(off == TDB_OFF_ERR)) return -1; @@ -494,7 +494,7 @@ int tdb_append(struct tdb_context *tdb, struct tdb_data new_dbuf; int ret; - off = find_and_lock(tdb, key, F_WRLCK, &h, &rec); + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (unlikely(off == TDB_OFF_ERR)) return -1; @@ -562,7 +562,7 @@ struct tdb_data tdb_fetch(struct tdb_context *tdb, struct tdb_data key) struct hash_info h; struct tdb_data ret; - off = find_and_lock(tdb, key, F_RDLCK, &h, &rec); + off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); if (unlikely(off == TDB_OFF_ERR)) return tdb_null; @@ -585,7 +585,7 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key) struct tdb_used_record rec; struct hash_info h; - off = find_and_lock(tdb, key, F_WRLCK, &h, &rec); + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); if (unlikely(off == TDB_OFF_ERR)) return -1; diff --git a/ccan/tdb2/test/run-04-basichash.c b/ccan/tdb2/test/run-04-basichash.c index ba70cc37..db895b6c 100644 --- a/ccan/tdb2/test/run-04-basichash.c +++ b/ccan/tdb2/test/run-04-basichash.c @@ -45,7 +45,7 @@ int main(int argc, char *argv[]) v = 0; /* Should not find it. */ - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 0. */ @@ -84,7 +84,8 @@ int main(int argc, char *argv[]) ok1(tdb_check(tdb, NULL, NULL) == 0); /* Now, this should give a successful lookup. */ - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 0. */ @@ -110,7 +111,7 @@ int main(int argc, char *argv[]) /* Test expansion. */ v = 1; - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 1. */ @@ -146,7 +147,8 @@ int main(int argc, char *argv[]) /* Should be able to find it. */ v = 0; - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in expanded group 0, bucket 0. */ @@ -178,7 +180,7 @@ int main(int argc, char *argv[]) /* Test second-level expansion: should expand 0th bucket. */ v = 0; - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 0. */ @@ -210,7 +212,7 @@ int main(int argc, char *argv[]) /* Should be happy with expansion. */ ok1(tdb_check(tdb, NULL, NULL) == 0); - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == 0); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 0. */ @@ -241,7 +243,8 @@ int main(int argc, char *argv[]) /* Should be able to find it. */ v = 0; - ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec) == new_off); + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in expanded group 0, bucket 0. */ diff --git a/ccan/tdb2/test/run-20-growhash.c b/ccan/tdb2/test/run-20-growhash.c index 6bcfd7d3..adbe733e 100644 --- a/ccan/tdb2/test/run-20-growhash.c +++ b/ccan/tdb2/test/run-20-growhash.c @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) /* Check first still exists. */ kdata = make_key(0, 0, 0, 0, 0, 0); - ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0); + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL) != 0); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have located space in group 0, bucket 0. */ @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); - ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0); + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL)); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have moved to subhash */ @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); ok1(tdb_check(tdb, NULL, NULL) == 0); - ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec) != 0); + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL)); /* Should have created correct hash. */ ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); /* Should have moved to subhash */ diff --git a/ccan/tdb2/test/run-firstkey-nextkey.c b/ccan/tdb2/test/run-firstkey-nextkey.c new file mode 100644 index 00000000..222d1b53 --- /dev/null +++ b/ccan/tdb2/test/run-firstkey-nextkey.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "logging.h" + +#define NUM_RECORDS 1000 + +static bool store_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + return true; +} + +struct trav_data { + unsigned int records[NUM_RECORDS]; + unsigned int calls; +}; + +static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p) +{ + struct trav_data *td = p; + int val; + + memcpy(&val, dbuf.dptr, dbuf.dsize); + td->records[td->calls++] = val; + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + int num; + struct trav_data td; + TDB_DATA k, k2; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (NUM_RECORDS*4 + (NUM_RECORDS-1)*2 + 20) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-traverse.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(tdb_firstkey(tdb).dptr == NULL); + ok1(tdb_error(tdb) == TDB_SUCCESS); + + /* One entry... */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0); + k = tdb_firstkey(tdb); + ok1(k.dsize == sizeof(num)); + ok1(memcmp(k.dptr, &num, sizeof(num)) == 0); + k2 = tdb_nextkey(tdb, k); + ok1(k2.dsize == 0 && k2.dptr == NULL); + free(k.dptr); + + /* Two entries. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 1; + ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0); + k = tdb_firstkey(tdb); + ok1(k.dsize == sizeof(num)); + memcpy(&num, k.dptr, sizeof(num)); + ok1(num == 0 || num == 1); + k2 = tdb_nextkey(tdb, k); + ok1(k2.dsize == sizeof(j)); + free(k.dptr); + memcpy(&j, k2.dptr, sizeof(j)); + ok1(j == 0 || j == 1); + ok1(j != num); + k = tdb_nextkey(tdb, k2); + ok1(k.dsize == 0 && k.dptr == NULL); + free(k2.dptr); + + /* Clean up. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(tdb_delete(tdb, k) == 0); + num = 1; + ok1(tdb_delete(tdb, k) == 0); + + /* Now lots of records. */ + ok1(store_records(tdb)); + td.calls = 0; + + num = tdb_traverse_read(tdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(td.calls == NUM_RECORDS); + + /* Simple loop should match tdb_traverse_read */ + for (j = 0, k = tdb_firstkey(tdb); j < td.calls; j++) { + int val; + + ok1(k.dsize == sizeof(val)); + memcpy(&val, k.dptr, k.dsize); + ok1(td.records[j] == val); + k2 = tdb_nextkey(tdb, k); + free(k.dptr); + k = k2; + } + + /* But arbitary orderings should work too. */ + for (j = td.calls-1; j > 0; j--) { + k.dptr = (unsigned char *)&td.records[j-1]; + k.dsize = sizeof(td.records[j-1]); + k = tdb_nextkey(tdb, k); + ok1(k.dsize == sizeof(td.records[j])); + ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0); + free(k.dptr); + } + + /* Even delete should work. */ + for (j = 0, k = tdb_firstkey(tdb); k.dptr; j++) { + ok1(k.dsize == 4); + ok1(tdb_delete(tdb, k) == 0); + k2 = tdb_nextkey(tdb, k); + free(k.dptr); + k = k2; + } + + diag("delete using first/nextkey gave %u of %u records", + j, NUM_RECORDS); + ok1(j == NUM_RECORDS); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +}