tdb2: add write flag to tdb_direct
[ccan] / ccan / tdb2 / check.c
1  /* 
2    Trivial Database 2: free list/block handling
3    Copyright (C) Rusty Russell 2010
4    
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/likely/likely.h>
20 #include <ccan/asearch/asearch.h>
21
22 /* We keep an ordered array of offsets. */
23 static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
24 {
25         tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
26         if (!new)
27                 return false;
28         new[(*num)++] = off;
29         *arr = new;
30         return true;
31 }
32
33 static bool check_header(struct tdb_context *tdb, tdb_off_t *recovery)
34 {
35         uint64_t hash_test;
36         struct tdb_header hdr;
37
38         if (tdb_read_convert(tdb, 0, &hdr, sizeof(hdr)) == -1)
39                 return false;
40         /* magic food should not be converted, so convert back. */
41         tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
42
43         hash_test = TDB_HASH_MAGIC;
44         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
45         if (hdr.hash_test != hash_test) {
46                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
47                            "check: hash test %llu should be %llu",
48                            (long long)hdr.hash_test,
49                            (long long)hash_test);
50                 return false;
51         }
52
53         if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
54                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
55                            "check: bad magic '%.*s'",
56                            (unsigned)sizeof(hdr.magic_food), hdr.magic_food);
57                 return false;
58         }
59
60         *recovery = hdr.recovery;
61         if (*recovery) {
62                 if (*recovery < sizeof(hdr) || *recovery > tdb->map_size) {
63                         tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
64                                  "tdb_check: invalid recovery offset %zu",
65                                  (size_t)*recovery);
66                         return false;
67                 }
68         }
69
70         /* Don't check reserved: they *can* be used later. */
71         return true;
72 }
73
74 static bool check_hash_tree(struct tdb_context *tdb,
75                             tdb_off_t off, unsigned int group_bits,
76                             uint64_t hprefix,
77                             unsigned hprefix_bits,
78                             tdb_off_t used[],
79                             size_t num_used,
80                             size_t *num_found,
81                             int (*check)(TDB_DATA, TDB_DATA, void *),
82                             void *private_data);
83
84 static bool check_hash_chain(struct tdb_context *tdb,
85                              tdb_off_t off,
86                              uint64_t hash,
87                              tdb_off_t used[],
88                              size_t num_used,
89                              size_t *num_found,
90                              int (*check)(TDB_DATA, TDB_DATA, void *),
91                              void *private_data)
92 {
93         struct tdb_used_record rec;
94
95         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
96                 return false;
97
98         if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
99                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
100                            "tdb_check: Bad hash chain length %llu vs %zu",
101                            (long long)rec_data_length(&rec),
102                            sizeof(struct tdb_chain));
103                 return false;
104         }
105         if (rec_key_length(&rec) != 0) {
106                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
107                          "tdb_check: Bad hash chain key length %llu",
108                          (long long)rec_key_length(&rec));
109                 return false;
110         }
111         if (rec_hash(&rec) != 2) {
112                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
113                          "tdb_check: Bad hash chain hash value %llu",
114                          (long long)rec_hash(&rec));
115                 return false;
116         }
117
118         off += sizeof(rec);
119         if (!check_hash_tree(tdb, off, 0, hash, 64,
120                              used, num_used, num_found, check, private_data))
121                 return false;
122
123         off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
124         if (off == TDB_OFF_ERR)
125                 return false;
126         if (off == 0)
127                 return true;
128         (*num_found)++;
129         return check_hash_chain(tdb, off, hash, used, num_used, num_found,
130                                 check, private_data);
131 }
132
133 static bool check_hash_record(struct tdb_context *tdb,
134                               tdb_off_t off,
135                               uint64_t hprefix,
136                               unsigned hprefix_bits,
137                               tdb_off_t used[],
138                               size_t num_used,
139                               size_t *num_found,
140                               int (*check)(TDB_DATA, TDB_DATA, void *),
141                               void *private_data)
142 {
143         struct tdb_used_record rec;
144
145         if (hprefix_bits >= 64)
146                 return check_hash_chain(tdb, off, hprefix, used, num_used,
147                                         num_found, check, private_data);
148
149         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
150                 return false;
151
152         if (rec_data_length(&rec)
153             != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
154                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
155                            "tdb_check: Bad hash table length %llu vs %llu",
156                            (long long)rec_data_length(&rec),
157                            (long long)sizeof(tdb_off_t)
158                            << TDB_SUBLEVEL_HASH_BITS);
159                 return false;
160         }
161         if (rec_key_length(&rec) != 0) {
162                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
163                          "tdb_check: Bad hash table key length %llu",
164                          (long long)rec_key_length(&rec));
165                 return false;
166         }
167         if (rec_hash(&rec) != 0) {
168                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
169                          "tdb_check: Bad hash table hash value %llu",
170                          (long long)rec_hash(&rec));
171                 return false;
172         }
173
174         off += sizeof(rec);
175         return check_hash_tree(tdb, off,
176                                TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
177                                hprefix, hprefix_bits,
178                                used, num_used, num_found, check, private_data);
179 }
180
181 static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
182 {
183         /* Can overflow an int. */
184         return *a > *b ? 1
185                 : *a < *b ? -1
186                 : 0;
187 }
188
189 static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
190 {
191         *used += num;
192
193         return (h >> (64 - *used)) & ((1U << num) - 1);
194 }
195
196 static bool check_hash_tree(struct tdb_context *tdb,
197                             tdb_off_t off, unsigned int group_bits,
198                             uint64_t hprefix,
199                             unsigned hprefix_bits,
200                             tdb_off_t used[],
201                             size_t num_used,
202                             size_t *num_found,
203                             int (*check)(TDB_DATA, TDB_DATA, void *),
204                             void *private_data)
205 {
206         unsigned int g, b;
207         const tdb_off_t *hash;
208         struct tdb_used_record rec;
209
210         hash = tdb_access_read(tdb, off,
211                                sizeof(tdb_off_t)
212                                << (group_bits + TDB_HASH_GROUP_BITS),
213                                true);
214         if (!hash)
215                 return false;
216
217         for (g = 0; g < (1 << group_bits); g++) {
218                 const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
219                 for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
220                         unsigned int bucket, i, used_bits;
221                         uint64_t h;
222                         tdb_off_t *p;
223                         if (group[b] == 0)
224                                 continue;
225
226                         off = group[b] & TDB_OFF_MASK;
227                         p = asearch(&off, used, num_used, off_cmp);
228                         if (!p) {
229                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
230                                            TDB_DEBUG_ERROR,
231                                            "tdb_check: Invalid offset %llu "
232                                            "in hash", (long long)off);
233                                 goto fail;
234                         }
235                         /* Mark it invalid. */
236                         *p ^= 1;
237                         (*num_found)++;
238
239                         if (hprefix_bits == 64) {
240                                 /* Chained entries are unordered. */
241                                 if (is_subhash(group[b])) {
242                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
243                                                    TDB_DEBUG_ERROR,
244                                                    "tdb_check: Invalid chain"
245                                                    " entry subhash");
246                                         goto fail;
247                                 }
248                                 h = hash_record(tdb, off);
249                                 if (h != hprefix) {
250                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
251                                                    TDB_DEBUG_ERROR,
252                                                    "check: bad hash chain"
253                                                    " placement"
254                                                    " 0x%llx vs 0x%llx",
255                                                    (long long)h,
256                                                    (long long)hprefix);
257                                         goto fail;
258                                 }
259                                 if (tdb_read_convert(tdb, off, &rec,
260                                                      sizeof(rec)))
261                                         goto fail;
262                                 goto check;
263                         }
264
265                         if (is_subhash(group[b])) {
266                                 uint64_t subprefix;
267                                 subprefix = (hprefix 
268                                      << (group_bits + TDB_HASH_GROUP_BITS))
269                                         + g * (1 << TDB_HASH_GROUP_BITS) + b;
270
271                                 if (!check_hash_record(tdb,
272                                                group[b] & TDB_OFF_MASK,
273                                                subprefix,
274                                                hprefix_bits
275                                                        + group_bits
276                                                        + TDB_HASH_GROUP_BITS,
277                                                used, num_used, num_found,
278                                                check, private_data))
279                                         goto fail;
280                                 continue;
281                         }
282                         /* A normal entry */
283
284                         /* Does it belong here at all? */
285                         h = hash_record(tdb, off);
286                         used_bits = 0;
287                         if (get_bits(h, hprefix_bits, &used_bits) != hprefix
288                             && hprefix_bits) {
289                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
290                                            TDB_DEBUG_ERROR,
291                                            "check: bad hash placement"
292                                            " 0x%llx vs 0x%llx",
293                                          (long long)h, (long long)hprefix);
294                                 goto fail;
295                         }
296
297                         /* Does it belong in this group? */
298                         if (get_bits(h, group_bits, &used_bits) != g) {
299                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
300                                            TDB_DEBUG_ERROR,
301                                            "check: bad group %llu vs %u",
302                                            (long long)h, g);
303                                 goto fail;
304                         }
305
306                         /* Are bucket bits correct? */
307                         bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
308                         if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
309                             != bucket) {
310                                 used_bits -= TDB_HASH_GROUP_BITS;
311                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
312                                            TDB_DEBUG_ERROR,
313                                          "check: bad bucket %u vs %u",
314                                          (unsigned)get_bits(h,
315                                                         TDB_HASH_GROUP_BITS,
316                                                         &used_bits),
317                                          bucket);
318                                 goto fail;
319                         }
320
321                         /* There must not be any zero entries between
322                          * the bucket it belongs in and this one! */
323                         for (i = bucket;
324                              i != b;
325                              i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
326                                 if (group[i] == 0) {
327                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
328                                                    TDB_DEBUG_ERROR,
329                                                    "check: bad group placement"
330                                                    " %u vs %u",
331                                                    b, bucket);
332                                         goto fail;
333                                 }
334                         }
335
336                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)))
337                                 goto fail;
338
339                         /* Bottom bits must match header. */
340                         if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
341                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
342                                            TDB_DEBUG_ERROR,
343                                            "tdb_check: Bad hash magic at"
344                                            " offset %llu (0x%llx vs 0x%llx)",
345                                            (long long)off,
346                                            (long long)h,
347                                            (long long)rec_hash(&rec));
348                                 goto fail;
349                         }
350
351                 check:
352                         if (check) {
353                                 TDB_DATA key, data;
354                                 key.dsize = rec_key_length(&rec);
355                                 data.dsize = rec_data_length(&rec);
356                                 key.dptr = (void *)tdb_access_read(tdb,
357                                                    off + sizeof(rec),
358                                                    key.dsize + data.dsize,
359                                                    false);
360                                 if (!key.dptr)
361                                         goto fail;
362                                 data.dptr = key.dptr + key.dsize;
363                                 if (check(key, data, private_data) != 0)
364                                         goto fail;
365                                 tdb_access_release(tdb, key.dptr);
366                         }
367                 }
368         }
369         tdb_access_release(tdb, hash);
370         return true;
371
372 fail:
373         tdb_access_release(tdb, hash);
374         return false;
375 }
376
377 static bool check_hash(struct tdb_context *tdb,
378                        tdb_off_t used[],
379                        size_t num_used, size_t num_flists,
380                        int (*check)(TDB_DATA, TDB_DATA, void *),
381                        void *private_data)
382 {
383         /* Free lists also show up as used. */
384         size_t num_found = num_flists;
385
386         if (!check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
387                              TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
388                              0, 0, used, num_used, &num_found,
389                              check, private_data))
390                 return false;
391
392         if (num_found != num_used) {
393                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
394                            "tdb_check: Not all entries are in hash");
395                 return false;
396         }
397         return true;
398 }
399
400 static bool check_free(struct tdb_context *tdb,
401                        tdb_off_t off,
402                        const struct tdb_free_record *frec,
403                        tdb_off_t prev, unsigned int flist, unsigned int bucket)
404 {
405         if (frec_magic(frec) != TDB_FREE_MAGIC) {
406                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
407                            "tdb_check: offset %llu bad magic 0x%llx",
408                            (long long)off, (long long)frec->magic_and_prev);
409                 return false;
410         }
411         if (frec_flist(frec) != flist) {
412                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
413                            "tdb_check: offset %llu bad freelist %u",
414                            (long long)off, frec_flist(frec));
415                 return false;
416         }
417
418         if (tdb->methods->oob(tdb, off
419                               + frec_len(frec) + sizeof(struct tdb_used_record),
420                               false))
421                 return false;
422         if (size_to_bucket(frec_len(frec)) != bucket) {
423                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
424                            "tdb_check: offset %llu in wrong bucket %u vs %u",
425                            (long long)off,
426                            bucket, size_to_bucket(frec_len(frec)));
427                 return false;
428         }
429         if (prev != frec_prev(frec)) {
430                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
431                            "tdb_check: offset %llu bad prev %llu vs %llu",
432                            (long long)off,
433                            (long long)prev, (long long)frec_len(frec));
434                 return false;
435         }
436         return true;
437 }
438                        
439 static bool check_free_list(struct tdb_context *tdb,
440                             tdb_off_t flist_off,
441                             unsigned flist_num,
442                             tdb_off_t free[],
443                             size_t num_free,
444                             size_t *num_found)
445 {
446         struct tdb_freelist flist;
447         tdb_off_t h;
448         unsigned int i;
449
450         if (tdb_read_convert(tdb, flist_off, &flist, sizeof(flist)) == -1)
451                 return false;
452
453         if (rec_magic(&flist.hdr) != TDB_MAGIC
454             || rec_key_length(&flist.hdr) != 0
455             || rec_data_length(&flist.hdr) != sizeof(flist) - sizeof(flist.hdr)
456             || rec_hash(&flist.hdr) != 1) {
457                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
458                            "tdb_check: Invalid header on free list");
459                 return false;
460         }
461
462         for (i = 0; i < TDB_FREE_BUCKETS; i++) {
463                 tdb_off_t off, prev = 0, *p;
464                 struct tdb_free_record f;
465
466                 h = bucket_off(flist_off, i);
467                 for (off = tdb_read_off(tdb, h); off; off = f.next) {
468                         if (off == TDB_OFF_ERR)
469                                 return false;
470                         if (tdb_read_convert(tdb, off, &f, sizeof(f)))
471                                 return false;
472                         if (!check_free(tdb, off, &f, prev, flist_num, i))
473                                 return false;
474
475                         /* FIXME: Check hash bits */
476                         p = asearch(&off, free, num_free, off_cmp);
477                         if (!p) {
478                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
479                                            TDB_DEBUG_ERROR,
480                                            "tdb_check: Invalid offset"
481                                            " %llu in free table",
482                                            (long long)off);
483                                 return false;
484                         }
485                         /* Mark it invalid. */
486                         *p ^= 1;
487                         (*num_found)++;
488                         prev = off;
489                 }
490         }
491         return true;
492 }
493
494 /* Slow, but should be very rare. */
495 size_t dead_space(struct tdb_context *tdb, tdb_off_t off)
496 {
497         size_t len;
498
499         for (len = 0; off + len < tdb->map_size; len++) {
500                 char c;
501                 if (tdb->methods->read(tdb, off, &c, 1))
502                         return 0;
503                 if (c != 0 && c != 0x43)
504                         break;
505         }
506         return len;
507 }
508
509 static bool check_linear(struct tdb_context *tdb,
510                          tdb_off_t **used, size_t *num_used,
511                          tdb_off_t **free, size_t *num_free,
512                          tdb_off_t recovery)
513 {
514         tdb_off_t off;
515         tdb_len_t len;
516         bool found_recovery = false;
517
518         for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) {
519                 union {
520                         struct tdb_used_record u;
521                         struct tdb_free_record f;
522                         struct tdb_recovery_record r;
523                 } rec;
524                 /* r is larger: only get that if we need to. */
525                 if (tdb_read_convert(tdb, off, &rec, sizeof(rec.f)) == -1)
526                         return false;
527
528                 /* If we crash after ftruncate, we can get zeroes or fill. */
529                 if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
530                     || rec.r.magic ==  0x4343434343434343ULL) {
531                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec.r)))
532                                 return false;
533
534                         if (recovery == off) {
535                                 found_recovery = true;
536                                 len = sizeof(rec.r) + rec.r.max_len;
537                         } else {
538                                 len = dead_space(tdb, off);
539                                 if (len < sizeof(rec.r)) {
540                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
541                                                    TDB_DEBUG_ERROR,
542                                                    "tdb_check: invalid dead"
543                                                    " space at %zu",
544                                                    (size_t)off);
545                                         return false;
546                                 }
547
548                                 tdb_logerr(tdb, TDB_SUCCESS, TDB_DEBUG_WARNING,
549                                            "Dead space at %zu-%zu (of %zu)",
550                                            (size_t)off, (size_t)(off + len),
551                                            (size_t)tdb->map_size);
552                         }
553                 } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
554                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec.r)))
555                                 return false;
556                         if (recovery != off) {
557                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
558                                            TDB_DEBUG_ERROR,
559                                            "tdb_check: unexpected recovery"
560                                            " record at offset %zu",
561                                            (size_t)off);
562                                 return false;
563                         }
564                         if (rec.r.len > rec.r.max_len) {
565                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
566                                            TDB_DEBUG_ERROR,
567                                            "tdb_check: invalid recovery length"
568                                            " %zu", (size_t)rec.r.len);
569                                 return false;
570                         }
571                         if (rec.r.eof > tdb->map_size) {
572                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
573                                            TDB_DEBUG_ERROR,
574                                            "tdb_check: invalid old EOF"
575                                            " %zu", (size_t)rec.r.eof);
576                                 return false;
577                         }
578                         found_recovery = true;
579                         len = sizeof(rec.r) + rec.r.max_len;
580                 } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
581                         len = sizeof(rec.u) + frec_len(&rec.f);
582                         if (off + len > tdb->map_size) {
583                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
584                                            TDB_DEBUG_ERROR,
585                                            "tdb_check: free overlength %llu"
586                                            " at offset %llu",
587                                            (long long)len, (long long)off);
588                                 return false;
589                         }
590                         /* This record should be in free lists. */
591                         if (frec_flist(&rec.f) != TDB_FLIST_NONE
592                             && !append(free, num_free, off))
593                                 return false;
594                 } else {
595                         uint64_t klen, dlen, extra;
596
597                         /* This record is used! */
598                         if (rec_magic(&rec.u) != TDB_MAGIC) {
599                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
600                                            TDB_DEBUG_ERROR,
601                                            "tdb_check: Bad magic 0x%llx"
602                                            " at offset %zu",
603                                            (long long)rec_magic(&rec.u),
604                                            (size_t)off);
605                                 return false;
606                         }
607
608                         if (!append(used, num_used, off))
609                                 return false;
610
611                         klen = rec_key_length(&rec.u);
612                         dlen = rec_data_length(&rec.u);
613                         extra = rec_extra_padding(&rec.u);
614
615                         len = sizeof(rec.u) + klen + dlen + extra;
616                         if (off + len > tdb->map_size) {
617                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
618                                            TDB_DEBUG_ERROR,
619                                            "tdb_check: used overlength %llu"
620                                            " at offset %llu",
621                                            (long long)len, (long long)off);
622                                 return false;
623                         }
624
625                         if (len < sizeof(rec.f)) {
626                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
627                                            TDB_DEBUG_ERROR,
628                                            "tdb_check: too short record %llu"
629                                            " at %llu",
630                                            (long long)len, (long long)off);
631                                 return false;
632                         }
633                 }
634         }
635
636         /* We must have found recovery area if there was one. */
637         if (recovery != 0 && !found_recovery) {
638                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
639                            "tdb_check: expected a recovery area at %zu",
640                            (size_t)recovery);
641                 return false;
642         }
643
644         return true;
645 }
646
647 int tdb_check(struct tdb_context *tdb,
648               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
649               void *private_data)
650 {
651         tdb_off_t *free = NULL, *used = NULL, flist, recovery;
652         size_t num_free = 0, num_used = 0, num_found = 0, num_flists = 0;
653
654         if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
655                 return -1;
656
657         if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
658                 tdb_allrecord_unlock(tdb, F_RDLCK);
659                 return -1;
660         }
661
662         if (!check_header(tdb, &recovery))
663                 goto fail;
664
665         /* First we do a linear scan, checking all records. */
666         if (!check_linear(tdb, &used, &num_used, &free, &num_free, recovery))
667                 goto fail;
668
669         for (flist = first_flist(tdb); flist; flist = next_flist(tdb, flist)) {
670                 if (flist == TDB_OFF_ERR)
671                         goto fail;
672                 if (!check_free_list(tdb, flist, num_flists, free, num_free,
673                                      &num_found))
674                         goto fail;
675                 num_flists++;
676         }
677
678         /* FIXME: Check key uniqueness? */
679         if (!check_hash(tdb, used, num_used, num_flists, check, private_data))
680                 goto fail;
681
682         if (num_found != num_free) {
683                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_DEBUG_ERROR,
684                            "tdb_check: Not all entries are in free table");
685                 return -1;
686         }
687
688         tdb_allrecord_unlock(tdb, F_RDLCK);
689         tdb_unlock_expand(tdb, F_RDLCK);
690         return 0;
691
692 fail:
693         tdb_allrecord_unlock(tdb, F_RDLCK);
694         tdb_unlock_expand(tdb, F_RDLCK);
695         return -1;
696 }