X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb2%2Fhash.c;h=17601c0c8bf3fc2bf3ed2496735c3dc725e8e7fd;hp=a8a701ec507d1ab13de7551a96244389c38742e3;hb=587982955ca9c61363d6e3004622ee97eda80e4b;hpb=4e185ad8ab5a7e01edbbe12d11eb2f1577de7e8b diff --git a/ccan/tdb2/hash.c b/ccan/tdb2/hash.c index a8a701ec..17601c0c 100644 --- a/ccan/tdb2/hash.c +++ b/ccan/tdb2/hash.c @@ -1,7 +1,7 @@ - /* + /* Trivial Database 2: hash handling Copyright (C) Rusty Russell 2010 - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either @@ -64,7 +64,7 @@ uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) } /* Get bits from a value. */ -static uint32_t bits(uint64_t val, unsigned start, unsigned num) +static uint32_t bits_from(uint64_t val, unsigned start, unsigned num) { assert(num <= 32); return (val >> start) & ((1U << num) - 1); @@ -75,7 +75,31 @@ static uint32_t bits(uint64_t val, unsigned start, unsigned num) static uint32_t use_bits(struct hash_info *h, unsigned num) { h->hash_used += num; - return bits(h->h, 64 - h->hash_used, num); + return bits_from(h->h, 64 - h->hash_used, num); +} + +static bool key_matches(struct tdb_context *tdb, + const struct tdb_used_record *rec, + tdb_off_t off, + const struct tdb_data *key) +{ + bool ret = false; + const char *rkey; + + if (rec_key_length(rec) != key->dsize) { + add_stat(tdb, compare_wrong_keylen, 1); + return ret; + } + + rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false); + if (!rkey) + return ret; + if (memcmp(rkey, key->dptr, key->dsize) == 0) + ret = true; + else + add_stat(tdb, compare_wrong_keycmp, 1); + tdb_access_release(tdb, rkey); + return ret; } /* Does entry match? */ @@ -85,34 +109,33 @@ static bool match(struct tdb_context *tdb, tdb_off_t val, struct tdb_used_record *rec) { - bool ret; - const unsigned char *rkey; tdb_off_t off; + add_stat(tdb, compares, 1); /* Desired bucket must match. */ - if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) + if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) { + add_stat(tdb, compare_wrong_bucket, 1); return false; + } /* Top bits of offset == next bits of hash. */ - if (bits(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA) - != bits(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, - TDB_OFF_UPPER_STEAL_EXTRA)) + if (bits_from(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA) + != bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, + TDB_OFF_UPPER_STEAL_EXTRA)) { + add_stat(tdb, compare_wrong_offsetbits, 1); return false; + } off = val & TDB_OFF_MASK; if (tdb_read_convert(tdb, off, rec, sizeof(*rec)) == -1) return false; - /* FIXME: check extra bits in header? */ - if (rec_key_length(rec) != key->dsize) + if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) { + add_stat(tdb, compare_wrong_rechash, 1); return false; + } - rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false); - if (!rkey) - return false; - ret = (memcmp(rkey, key->dptr, key->dsize) == 0); - tdb_access_release(tdb, rkey); - return ret; + return key_matches(tdb, rec, off, key); } static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket) @@ -133,6 +156,65 @@ static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size) return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS)); } +static tdb_off_t COLD find_in_chain(struct tdb_context *tdb, + struct tdb_data key, + tdb_off_t chain, + struct hash_info *h, + struct tdb_used_record *rec, + struct traverse_info *tinfo) +{ + tdb_off_t off, next; + + /* In case nothing is free, we set these to zero. */ + h->home_bucket = h->found_bucket = 0; + + for (off = chain; off; off = next) { + unsigned int i; + + h->group_start = off; + if (tdb_read_convert(tdb, off, h->group, sizeof(h->group))) + return TDB_OFF_ERR; + + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + tdb_off_t recoff; + if (!h->group[i]) { + /* Remember this empty bucket. */ + h->home_bucket = h->found_bucket = i; + continue; + } + + /* We can insert extra bits via add_to_hash + * empty bucket logic. */ + recoff = h->group[i] & TDB_OFF_MASK; + if (tdb_read_convert(tdb, recoff, rec, sizeof(*rec))) + return TDB_OFF_ERR; + + if (key_matches(tdb, rec, recoff, &key)) { + h->home_bucket = h->found_bucket = i; + + if (tinfo) { + tinfo->levels[tinfo->num_levels] + .hashtable = off; + tinfo->levels[tinfo->num_levels] + .total_buckets + = 1 << TDB_HASH_GROUP_BITS; + tinfo->levels[tinfo->num_levels].entry + = i; + tinfo->num_levels++; + } + return recoff; + } + } + next = tdb_read_off(tdb, off + + offsetof(struct tdb_chain, next)); + if (next == TDB_OFF_ERR) + return TDB_OFF_ERR; + if (next) + next += sizeof(struct tdb_used_record); + } + return 0; +} + /* This is the core routine which searches the hashtable for an entry. * On error, no locks are held and TDB_OFF_ERR is returned. * Otherwise, hinfo is filled in (and the optional tinfo). @@ -163,12 +245,12 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, tinfo->toplevel_group = group; tinfo->num_levels = 1; tinfo->levels[0].entry = 0; - tinfo->levels[0].hashtable = hashtable + tinfo->levels[0].hashtable = hashtable + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t); tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; } - while (likely(h->hash_used < 64)) { + while (h->hash_used <= 64) { /* Read in the hash group. */ h->group_start = hashtable + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); @@ -225,8 +307,7 @@ tdb_off_t find_and_lock(struct tdb_context *tdb, return 0; } - /* FIXME: We hit the bottom. Chain! */ - abort(); + return find_in_chain(tdb, key, hashtable, h, rec, tinfo); fail: tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype); @@ -297,13 +378,13 @@ static tdb_off_t encode_offset(tdb_off_t new_off, struct hash_info *h) { return h->home_bucket | new_off - | ((uint64_t)bits(h->h, + | ((uint64_t)bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, TDB_OFF_UPPER_STEAL_EXTRA) << TDB_OFF_HASH_EXTRA_BIT); } -/* Simply overwrite the hash entry we found before. */ +/* Simply overwrite the hash entry we found before. */ int replace_in_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) @@ -312,6 +393,41 @@ int replace_in_hash(struct tdb_context *tdb, encode_offset(new_off, h)); } +/* We slot in anywhere that's empty in the chain. */ +static int COLD add_to_chain(struct tdb_context *tdb, + tdb_off_t subhash, + tdb_off_t new_off) +{ + size_t entry = tdb_find_zero_off(tdb, subhash, 1< 64) - abort(); + return add_to_chain(tdb, subhash, off); h.h = hash_record(tdb, off); gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS); - h.group_start = subhash + sizeof(struct tdb_used_record) + h.group_start = subhash + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS); @@ -342,21 +457,29 @@ static int add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash, static int expand_group(struct tdb_context *tdb, struct hash_info *h) { - unsigned bucket, num_vals, i; + unsigned bucket, num_vals, i, magic; + size_t subsize; tdb_off_t subhash; tdb_off_t vals[1 << TDB_HASH_GROUP_BITS]; /* Attach new empty subhash under fullest bucket. */ bucket = fullest_bucket(tdb, h->group, h->home_bucket); - subhash = alloc(tdb, 0, sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS, - 0, false); + if (h->hash_used == 64) { + add_stat(tdb, alloc_chain, 1); + subsize = sizeof(struct tdb_chain); + magic = TDB_CHAIN_MAGIC; + } else { + add_stat(tdb, alloc_subhash, 1); + subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS); + magic = TDB_HTABLE_MAGIC; + } + + subhash = alloc(tdb, 0, subsize, 0, magic, false); if (subhash == TDB_OFF_ERR) return -1; - add_stat(tdb, alloc_subhash, 1); - if (zero_out(tdb, subhash + sizeof(struct tdb_used_record), - sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) == -1) + if (zero_out(tdb, subhash + sizeof(struct tdb_used_record), subsize)) return -1; /* Remove any which are destined for bucket or are in wrong place. */ @@ -376,6 +499,9 @@ static int expand_group(struct tdb_context *tdb, struct hash_info *h) /* Overwrite expanded bucket with subhash pointer. */ h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT); + /* Point to actual contents of record. */ + subhash += sizeof(struct tdb_used_record); + /* Put values back. */ for (i = 0; i < num_vals; i++) { unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK; @@ -430,10 +556,6 @@ int delete_from_hash(struct tdb_context *tdb, struct hash_info *h) int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) { - /* FIXME: chain! */ - if (h->hash_used >= 64) - abort(); - /* We hit an empty bucket during search? That's where it goes. */ if (!h->group[h->found_bucket]) { h->group[h->found_bucket] = encode_offset(new_off, h); @@ -442,6 +564,9 @@ int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off) h->group, sizeof(h->group)); } + if (h->hash_used > 64) + return add_to_chain(tdb, h->group_start, new_off); + /* We're full. Expand. */ if (expand_group(tdb, h) == -1) return -1; @@ -520,7 +645,11 @@ again: tlevel++; tlevel->hashtable = off + sizeof(struct tdb_used_record); tlevel->entry = 0; - tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS); + /* Next level is a chain? */ + if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) + tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS); + else + tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS); goto again; } @@ -528,6 +657,20 @@ again: if (tinfo->num_levels == 1) return 0; + /* Handle chained entries. */ + if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) { + tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable + + offsetof(struct tdb_chain, + next)); + if (tlevel->hashtable == TDB_OFF_ERR) + return TDB_OFF_ERR; + if (tlevel->hashtable) { + tlevel->hashtable += sizeof(struct tdb_used_record); + tlevel->entry = 0; + goto again; + } + } + /* Go back up and keep searching. */ tinfo->num_levels--; tlevel--; @@ -540,13 +683,13 @@ int next_in_hash(struct tdb_context *tdb, int ltype, TDB_DATA *kbuf, size_t *dlen) { const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS; - tdb_off_t hlock_start, hlock_range, off; + tdb_off_t hl_start, hl_range, off; while (tinfo->toplevel_group < (1 << group_bits)) { - hlock_start = (tdb_off_t)tinfo->toplevel_group + hl_start = (tdb_off_t)tinfo->toplevel_group << (64 - group_bits); - hlock_range = 1ULL << group_bits; - if (tdb_lock_hashes(tdb, hlock_start, hlock_range, ltype, + hl_range = 1ULL << group_bits; + if (tdb_lock_hashes(tdb, hl_start, hl_range, ltype, TDB_LOCK_WAIT) != 0) return -1; @@ -556,13 +699,12 @@ int next_in_hash(struct tdb_context *tdb, int ltype, if (tdb_read_convert(tdb, off, &rec, sizeof(rec))) { tdb_unlock_hashes(tdb, - hlock_start, hlock_range, - ltype); + hl_start, hl_range, ltype); return -1; } - if (rec_magic(&rec) != TDB_MAGIC) { + if (rec_magic(&rec) != TDB_USED_MAGIC) { tdb_logerr(tdb, TDB_ERR_CORRUPT, - TDB_DEBUG_FATAL, + TDB_LOG_ERROR, "next_in_hash:" " corrupt record at %llu", (long long)off); @@ -574,20 +716,20 @@ int next_in_hash(struct tdb_context *tdb, int ltype, /* They want data as well? */ if (dlen) { *dlen = rec_data_length(&rec); - kbuf->dptr = tdb_alloc_read(tdb, + kbuf->dptr = tdb_alloc_read(tdb, off + sizeof(rec), kbuf->dsize + *dlen); } else { - kbuf->dptr = tdb_alloc_read(tdb, + kbuf->dptr = tdb_alloc_read(tdb, off + sizeof(rec), kbuf->dsize); } - tdb_unlock_hashes(tdb, hlock_start, hlock_range, ltype); + tdb_unlock_hashes(tdb, hl_start, hl_range, ltype); return kbuf->dptr ? 1 : -1; } - tdb_unlock_hashes(tdb, hlock_start, hlock_range, ltype); + tdb_unlock_hashes(tdb, hl_start, hl_range, ltype); tinfo->toplevel_group++; tinfo->levels[0].hashtable @@ -624,7 +766,7 @@ static int chainlock(struct tdb_context *tdb, const TDB_DATA *key, unsigned int group, gbits; gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; - group = bits(h, 64 - gbits, gbits); + group = bits_from(h, 64 - gbits, gbits); lockstart = hlock_range(group, &locksize); @@ -647,7 +789,7 @@ int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key) unsigned int group, gbits; gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; - group = bits(h, 64 - gbits, gbits); + group = bits_from(h, 64 - gbits, gbits); lockstart = hlock_range(group, &locksize);