2 Trivial Database 2: free list/block handling
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include <ccan/likely/likely.h>
20 #include <ccan/asearch/asearch.h>
22 /* We keep an ordered array of offsets. */
23 static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
25 tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
33 static bool check_header(struct tdb_context *tdb)
36 struct tdb_header hdr;
38 if (tdb_read_convert(tdb, 0, &hdr, sizeof(hdr)) == -1)
40 /* magic food should not be converted, so convert back. */
41 tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
43 hash_test = TDB_HASH_MAGIC;
44 hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
45 if (hdr.hash_test != hash_test) {
46 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
47 "check: hash test %llu should be %llu\n",
48 hdr.hash_test, hash_test);
52 if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
53 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
54 "check: bad magic '%.*s'\n",
55 sizeof(hdr.magic_food), hdr.magic_food);
59 /* Don't check reserved: they *can* be used later. */
63 static bool check_hash_tree(struct tdb_context *tdb,
64 tdb_off_t off, unsigned int group_bits,
66 unsigned hprefix_bits,
71 static bool check_hash_record(struct tdb_context *tdb,
74 unsigned hprefix_bits,
79 struct tdb_used_record rec;
81 if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
84 if (rec_data_length(&rec)
85 != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
86 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
87 "tdb_check: Bad hash table length %llu vs %llu\n",
88 (long long)rec_data_length(&rec),
89 (long long)sizeof(tdb_off_t)<<TDB_SUBLEVEL_HASH_BITS);
92 if (rec_key_length(&rec) != 0) {
93 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
94 "tdb_check: Bad hash table key length %llu\n",
95 (long long)rec_key_length(&rec));
98 if (rec_hash(&rec) != 0) {
99 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
100 "tdb_check: Bad hash table hash value %llu\n",
101 (long long)rec_hash(&rec));
106 return check_hash_tree(tdb, off,
107 TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
108 hprefix, hprefix_bits,
109 used, num_used, num_found);
112 static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
114 /* Can overflow an int. */
120 static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
124 return (h >> (64 - *used)) & ((1U << num) - 1);
127 static bool check_hash_tree(struct tdb_context *tdb,
128 tdb_off_t off, unsigned int group_bits,
130 unsigned hprefix_bits,
136 const tdb_off_t *hash;
137 struct tdb_used_record rec;
139 hash = tdb_access_read(tdb, off,
141 << (group_bits + TDB_HASH_GROUP_BITS),
146 for (g = 0; g < (1 << group_bits); g++) {
147 const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
148 for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
149 unsigned int bucket, i, used_bits;
155 off = group[b] & TDB_OFF_MASK;
156 p = asearch(&off, used, num_used, off_cmp);
158 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
159 "tdb_check: Invalid offset %llu "
164 /* Mark it invalid. */
168 if (is_subhash(group[b])) {
171 << (group_bits + TDB_HASH_GROUP_BITS))
172 + g * (1 << TDB_HASH_GROUP_BITS) + b;
174 if (!check_hash_record(tdb,
175 group[b] & TDB_OFF_MASK,
179 + TDB_HASH_GROUP_BITS,
180 used, num_used, num_found))
186 /* Does it belong here at all? */
187 h = hash_record(tdb, off);
189 if (get_bits(h, hprefix_bits, &used_bits) != hprefix
191 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
192 "check: bad hash placement"
193 " 0x%llx vs 0x%llx\n",
194 (long long)h, (long long)hprefix);
198 /* Does it belong in this group? */
199 if (get_bits(h, group_bits, &used_bits) != g) {
200 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
201 "check: bad group %llu vs %u\n",
206 /* Are bucket bits correct? */
207 bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
208 if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
210 used_bits -= TDB_HASH_GROUP_BITS;
211 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
212 "check: bad bucket %u vs %u\n",
213 (unsigned)get_bits(h,
220 /* There must not be any zero entries between
221 * the bucket it belongs in and this one! */
224 i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
226 tdb->log(tdb, TDB_DEBUG_ERROR,
228 "check: bad group placement"
235 if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
238 /* Bottom bits must match header. */
239 if ((h & ((1 << 5)-1)) != rec_hash(&rec)) {
240 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
241 "tdb_check: Bad hash magic at"
242 " offset %llu (0x%llx vs 0x%llx)\n",
245 (long long)rec_hash(&rec));
250 tdb_access_release(tdb, hash);
254 tdb_access_release(tdb, hash);
258 static bool check_hash(struct tdb_context *tdb,
262 size_t num_found = 0;
264 if (!check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
265 TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
266 0, 0, used, num_used, &num_found))
269 if (num_found != num_used) {
270 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
271 "tdb_check: Not all entries are in hash\n");
277 static bool check_free(struct tdb_context *tdb,
279 const struct tdb_free_record *frec,
281 tdb_off_t zone_off, unsigned int bucket)
283 if (frec_magic(frec) != TDB_FREE_MAGIC) {
284 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
285 "tdb_check: offset %llu bad magic 0x%llx\n",
286 (long long)off, (long long)frec->magic_and_meta);
289 if (tdb->methods->oob(tdb, off
290 + frec->data_len+sizeof(struct tdb_used_record),
293 if (off < zone_off || off >= zone_off + (1ULL<<frec_zone_bits(frec))) {
294 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
295 "tdb_check: offset %llu outside zone %llu-%llu\n",
298 (long long)zone_off + (1ULL<<frec_zone_bits(frec)));
301 if (size_to_bucket(frec_zone_bits(frec), frec->data_len) != bucket) {
302 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
303 "tdb_check: offset %llu in wrong bucket %u vs %u\n",
306 size_to_bucket(frec_zone_bits(frec), frec->data_len));
309 if (prev != frec->prev) {
310 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
311 "tdb_check: offset %llu bad prev %llu vs %llu\n",
313 (long long)prev, (long long)frec->prev);
319 static tdb_len_t check_free_list(struct tdb_context *tdb,
325 struct free_zone_header zhdr;
329 if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
332 for (i = 0; i <= BUCKETS_FOR_ZONE(zhdr.zone_bits); i++) {
333 tdb_off_t off, prev = 0, *p;
334 struct tdb_free_record f;
336 h = bucket_off(zone_off, i);
337 for (off = tdb_read_off(tdb, h); off; off = f.next) {
338 if (off == TDB_OFF_ERR)
340 if (tdb_read_convert(tdb, off, &f, sizeof(f)))
342 if (!check_free(tdb, off, &f, prev, zone_off, i))
345 /* FIXME: Check hash bits */
346 p = asearch(&off, free, num_free, off_cmp);
348 tdb->log(tdb, TDB_DEBUG_ERROR,
350 "tdb_check: Invalid offset"
351 " %llu in free table\n",
355 /* Mark it invalid. */
361 return 1ULL << zhdr.zone_bits;
364 static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off,
365 tdb_off_t **used, size_t *num_used,
366 tdb_off_t **free, size_t *num_free,
367 unsigned int *max_zone_bits)
369 struct free_zone_header zhdr;
370 tdb_off_t off, hdrlen;
373 if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
376 if (zhdr.zone_bits < INITIAL_ZONE_BITS) {
377 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
378 "check: bad zone_bits %llu at zone %llu\n",
379 (long long)zhdr.zone_bits, (long long)zone_off);
383 /* Zone bits can only increase... */
384 if (zhdr.zone_bits > *max_zone_bits)
385 *max_zone_bits = zhdr.zone_bits;
386 else if (zhdr.zone_bits < *max_zone_bits) {
387 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
388 "check: small zone_bits %llu at zone %llu\n",
389 (long long)zhdr.zone_bits, (long long)zone_off);
393 /* Zone must be within file! */
394 if (tdb->methods->oob(tdb, zone_off + (1ULL << zhdr.zone_bits), false))
397 hdrlen = sizeof(zhdr)
398 + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
399 for (off = zone_off + hdrlen;
400 off < zone_off + (1ULL << zhdr.zone_bits);
403 struct tdb_used_record u;
404 struct tdb_free_record f;
406 p = tdb_get(tdb, off, &pad, sizeof(pad));
409 if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
410 if (frec_zone_bits(&p->f) != zhdr.zone_bits) {
411 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
412 "tdb_check: Bad free zone bits %u"
414 frec_zone_bits(&p->f),
418 /* This record is free! */
419 if (!append(free, num_free, off))
421 len = sizeof(p->u) + p->f.data_len;
422 if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
423 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
424 "tdb_check: free overlength %llu"
426 (long long)len, (long long)off);
430 uint64_t klen, dlen, extra;
432 /* This record is used! */
433 if (rec_magic(&p->u) != TDB_MAGIC) {
434 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
435 "tdb_check: Bad magic 0x%llx"
437 (long long)rec_magic(&p->u),
442 if (rec_zone_bits(&p->u) != zhdr.zone_bits) {
443 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
444 "tdb_check: Bad zone bits %u"
446 rec_zone_bits(&p->u),
451 if (!append(used, num_used, off))
454 klen = rec_key_length(&p->u);
455 dlen = rec_data_length(&p->u);
456 extra = rec_extra_padding(&p->u);
458 len = sizeof(p->u) + klen + dlen + extra;
459 if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
460 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
461 "tdb_check: used overlength %llu"
463 (long long)len, (long long)off);
467 if (len < sizeof(p->f)) {
468 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
469 "tdb_check: too short record %llu at"
471 (long long)len, (long long)off);
476 return 1ULL << zhdr.zone_bits;
479 /* FIXME: call check() function. */
480 int tdb_check(struct tdb_context *tdb,
481 int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
484 tdb_off_t *free = NULL, *used = NULL, off;
486 size_t num_free = 0, num_used = 0, num_found = 0;
487 unsigned max_zone_bits = INITIAL_ZONE_BITS;
490 if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
493 if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
494 tdb_allrecord_unlock(tdb, F_RDLCK);
498 if (!check_header(tdb))
501 /* First we do a linear scan, checking all records. */
502 for (off = sizeof(struct tdb_header);
503 off < tdb->map_size - 1;
505 len = check_zone(tdb, off, &used, &num_used, &free, &num_free,
507 if (len == TDB_OFF_ERR)
512 if (tdb->methods->read(tdb, tdb->map_size - 1, &tailer, 1) == -1)
514 if (tailer != max_zone_bits) {
515 tdb->ecode = TDB_ERR_CORRUPT;
516 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
517 "tdb_check: Bad tailer value %u vs %u\n", tailer,
522 /* FIXME: Check key uniqueness? */
523 if (!check_hash(tdb, used, num_used))
526 for (off = sizeof(struct tdb_header);
527 off < tdb->map_size - 1;
529 len = check_free_list(tdb, off, free, num_free, &num_found);
530 if (len == TDB_OFF_ERR)
533 if (num_found != num_free) {
534 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
535 "tdb_check: Not all entries are in free table\n");
539 tdb_allrecord_unlock(tdb, F_RDLCK);
540 tdb_unlock_expand(tdb, F_RDLCK);
544 tdb_allrecord_unlock(tdb, F_RDLCK);
545 tdb_unlock_expand(tdb, F_RDLCK);