X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb2%2Fhash.c;h=51874918c62ef4fb53e5581c252e8ace6301de9a;hp=0cc93760d1613fa44c4a2681133f27aeadb50c19;hb=c5e3f07a30056cb7c3c380bf690f80815ca4b6b1;hpb=6804501c350181dea8f531142b28c620b70edbd9 diff --git a/ccan/tdb2/hash.c b/ccan/tdb2/hash.c index 0cc93760..51874918 100644 --- a/ccan/tdb2/hash.c +++ b/ccan/tdb2/hash.c @@ -129,16 +129,24 @@ bool is_subhash(tdb_off_t val) return val >> (64-TDB_OFF_UPPER_STEAL) == (1<home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); - /* FIXME: Guess the depth, don't over-lock! */ - h->hlock_start = (tdb_off_t)group - << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS)); - h->hlock_range = 1ULL << (64 - (TDB_TOPLEVEL_HASH_BITS - - TDB_HASH_GROUP_BITS)); + h->hlock_start = hlock_range(group, &h->hlock_range); if (tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype, TDB_LOCK_WAIT)) return TDB_OFF_ERR; hashtable = offsetof(struct tdb_header, hashtable); + if (tinfo) { + tinfo->toplevel_group = group; + tinfo->num_levels = 1; + tinfo->levels[0].entry = 0; + tinfo->levels[0].hashtable = hashtable + + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t); + tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; + } while (likely(h->hash_used < 64)) { /* Read in the hash group. */ @@ -172,9 +184,23 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, if (is_subhash(h->group[h->home_bucket])) { hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK) + sizeof(struct tdb_used_record); + if (tinfo) { + /* When we come back, use *next* bucket */ + tinfo->levels[tinfo->num_levels-1].entry + += h->home_bucket + 1; + } group = use_bits(h, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); + if (tinfo) { + tinfo->levels[tinfo->num_levels].hashtable + = hashtable; + tinfo->levels[tinfo->num_levels].total_buckets + = 1 << TDB_SUBLEVEL_HASH_BITS; + tinfo->levels[tinfo->num_levels].entry + = group << TDB_HASH_GROUP_BITS; + tinfo->num_levels++; + } continue; } @@ -189,8 +215,14 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, if (!h->group[h->found_bucket]) break; - if (match(tdb, h, &key, h->group[h->found_bucket], rec)) + if (match(tdb, h, &key, h->group[h->found_bucket], + rec)) { + if (tinfo) { + tinfo->levels[tinfo->num_levels-1].entry + += h->found_bucket; + } return h->group[h->found_bucket] & TDB_OFF_MASK; + } } /* Didn't find it: h indicates where it would go. */ return 0; @@ -450,74 +482,56 @@ int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) return add_to_hash(tdb, h, new_off); } -/* No point holding references/copies of db once we drop lock. */ -static void release_entries(struct tdb_context *tdb, - struct traverse_info *tinfo) -{ - unsigned int i; - - for (i = 0; i < tinfo->num_levels; i++) { - if (tinfo->levels[i].entries) { - tdb_access_release(tdb, tinfo->levels[i].entries); - tinfo->levels[i].entries = NULL; - } - } -} - /* Traverse support: returns offset of record, or 0 or TDB_OFF_ERR. */ static tdb_off_t iterate_hash(struct tdb_context *tdb, struct traverse_info *tinfo) { - tdb_off_t off; + tdb_off_t off, val; unsigned int i; struct traverse_level *tlevel; tlevel = &tinfo->levels[tinfo->num_levels-1]; again: - if (!tlevel->entries) { - tlevel->entries = tdb_access_read(tdb, tlevel->hashtable, - sizeof(tdb_off_t) - * tlevel->total_buckets, - true); - if (!tlevel->entries) + for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable, + tlevel->entry, tlevel->total_buckets); + i != tlevel->total_buckets; + i = tdb_find_nonzero_off(tdb, tlevel->hashtable, + i+1, tlevel->total_buckets)) { + val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i); + if (unlikely(val == TDB_OFF_ERR)) return TDB_OFF_ERR; - } - /* FIXME: Use tdb_find_nonzero_off? */ - for (i = tlevel->entry; i < tlevel->total_buckets; i++) { - if (!tlevel->entries[i] || tlevel->entries[i] == tinfo->prev) + off = val & TDB_OFF_MASK; + + /* This makes the delete-all-in-traverse case work + * (and simplifies our logic a little). */ + if (off == tinfo->prev) continue; tlevel->entry = i; - off = tlevel->entries[i] & TDB_OFF_MASK; - if (!is_subhash(tlevel->entries[i])) { + if (!is_subhash(val)) { /* Found one. */ - tinfo->prev = tlevel->entries[i]; - release_entries(tdb, tinfo); + tinfo->prev = off; return off; } - /* When we come back, we want tne next one */ + /* When we come back, we want the next one */ tlevel->entry++; tinfo->num_levels++; tlevel++; tlevel->hashtable = off + sizeof(struct tdb_used_record); tlevel->entry = 0; - tlevel->entries = NULL; tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS); goto again; } /* Nothing there? */ - if (tinfo->num_levels == 1) { - release_entries(tdb, tinfo); + if (tinfo->num_levels == 1) return 0; - } /* Go back up and keep searching. */ - tdb_access_release(tdb, tlevel->entries); tinfo->num_levels--; tlevel--; goto again; @@ -526,7 +540,7 @@ again: /* Return 1 if we find something, 0 if not, -1 on error. */ int next_in_hash(struct tdb_context *tdb, int ltype, struct traverse_info *tinfo, - TDB_DATA *kbuf, unsigned int *dlen) + TDB_DATA *kbuf, size_t *dlen) { const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS; tdb_off_t hlock_start, hlock_range, off; @@ -549,6 +563,14 @@ int next_in_hash(struct tdb_context *tdb, int ltype, ltype); return -1; } + if (rec_magic(&rec) != TDB_MAGIC) { + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "next_in_hash:" + " corrupt record at %llu\n", + (long long)off); + return -1; + } + kbuf->dsize = rec_key_length(&rec); /* They want data as well? */ @@ -580,15 +602,57 @@ int next_in_hash(struct tdb_context *tdb, int ltype, /* Return 1 if we find something, 0 if not, -1 on error. */ int first_in_hash(struct tdb_context *tdb, int ltype, struct traverse_info *tinfo, - TDB_DATA *kbuf, unsigned int *dlen) + TDB_DATA *kbuf, size_t *dlen) { tinfo->prev = 0; tinfo->toplevel_group = 0; tinfo->num_levels = 1; tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable); - tinfo->levels[0].entries = NULL; tinfo->levels[0].entry = 0; tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS); return next_in_hash(tdb, ltype, tinfo, kbuf, dlen); } + +/* Even if the entry isn't in this hash bucket, you'd have to lock this + * bucket to find it. */ +static int chainlock(struct tdb_context *tdb, const TDB_DATA *key, + int ltype, enum tdb_lock_flags waitflag, + const char *func) +{ + int ret; + uint64_t h = tdb_hash(tdb, key->dptr, key->dsize); + tdb_off_t lockstart, locksize; + unsigned int group, gbits; + + gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; + group = bits(h, 64 - gbits, gbits); + + lockstart = hlock_range(group, &locksize); + + ret = tdb_lock_hashes(tdb, lockstart, locksize, ltype, waitflag); + tdb_trace_1rec(tdb, func, *key); + return ret; +} + +/* lock/unlock one hash chain. This is meant to be used to reduce + contention - it cannot guarantee how many records will be locked */ +int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key) +{ + return chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT, "tdb_chainlock"); +} + +int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_off_t lockstart, locksize; + unsigned int group, gbits; + + gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; + group = bits(h, 64 - gbits, gbits); + + lockstart = hlock_range(group, &locksize); + + tdb_trace_1rec(tdb, "tdb_chainunlock", key); + return tdb_unlock_hashes(tdb, lockstart, locksize, F_WRLCK); +}