]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/check.c
tdb2: rework io functions to return enum TDB_ERROR.
[ccan] / ccan / tdb2 / check.c
1  /*
2    Trivial Database 2: free list/block handling
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/likely/likely.h>
20 #include <ccan/asearch/asearch.h>
21
22 /* We keep an ordered array of offsets. */
23 static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
24 {
25         tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
26         if (!new)
27                 return false;
28         new[(*num)++] = off;
29         *arr = new;
30         return true;
31 }
32
33 static bool check_header(struct tdb_context *tdb, tdb_off_t *recovery)
34 {
35         uint64_t hash_test;
36         struct tdb_header hdr;
37
38         if (tdb_read_convert(tdb, 0, &hdr, sizeof(hdr)) == -1)
39                 return false;
40         /* magic food should not be converted, so convert back. */
41         tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
42
43         hash_test = TDB_HASH_MAGIC;
44         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
45         if (hdr.hash_test != hash_test) {
46                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
47                            "check: hash test %llu should be %llu",
48                            (long long)hdr.hash_test,
49                            (long long)hash_test);
50                 return false;
51         }
52
53         if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
54                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
55                            "check: bad magic '%.*s'",
56                            (unsigned)sizeof(hdr.magic_food), hdr.magic_food);
57                 return false;
58         }
59
60         *recovery = hdr.recovery;
61         if (*recovery) {
62                 if (*recovery < sizeof(hdr) || *recovery > tdb->map_size) {
63                         tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
64                                  "tdb_check: invalid recovery offset %zu",
65                                  (size_t)*recovery);
66                         return false;
67                 }
68         }
69
70         /* Don't check reserved: they *can* be used later. */
71         return true;
72 }
73
74 static bool check_hash_tree(struct tdb_context *tdb,
75                             tdb_off_t off, unsigned int group_bits,
76                             uint64_t hprefix,
77                             unsigned hprefix_bits,
78                             tdb_off_t used[],
79                             size_t num_used,
80                             size_t *num_found,
81                             int (*check)(TDB_DATA, TDB_DATA, void *),
82                             void *private_data);
83
84 static bool check_hash_chain(struct tdb_context *tdb,
85                              tdb_off_t off,
86                              uint64_t hash,
87                              tdb_off_t used[],
88                              size_t num_used,
89                              size_t *num_found,
90                              int (*check)(TDB_DATA, TDB_DATA, void *),
91                              void *private_data)
92 {
93         struct tdb_used_record rec;
94
95         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
96                 return false;
97
98         if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
99                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
100                            "tdb_check: Bad hash chain magic %llu",
101                            (long long)rec_magic(&rec));
102                 return false;
103         }
104
105         if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
106                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
107                            "tdb_check: Bad hash chain length %llu vs %zu",
108                            (long long)rec_data_length(&rec),
109                            sizeof(struct tdb_chain));
110                 return false;
111         }
112         if (rec_key_length(&rec) != 0) {
113                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
114                          "tdb_check: Bad hash chain key length %llu",
115                          (long long)rec_key_length(&rec));
116                 return false;
117         }
118         if (rec_hash(&rec) != 0) {
119                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
120                          "tdb_check: Bad hash chain hash value %llu",
121                          (long long)rec_hash(&rec));
122                 return false;
123         }
124
125         off += sizeof(rec);
126         if (!check_hash_tree(tdb, off, 0, hash, 64,
127                              used, num_used, num_found, check, private_data))
128                 return false;
129
130         off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
131         if (off == TDB_OFF_ERR)
132                 return false;
133         if (off == 0)
134                 return true;
135         (*num_found)++;
136         return check_hash_chain(tdb, off, hash, used, num_used, num_found,
137                                 check, private_data);
138 }
139
140 static bool check_hash_record(struct tdb_context *tdb,
141                               tdb_off_t off,
142                               uint64_t hprefix,
143                               unsigned hprefix_bits,
144                               tdb_off_t used[],
145                               size_t num_used,
146                               size_t *num_found,
147                               int (*check)(TDB_DATA, TDB_DATA, void *),
148                               void *private_data)
149 {
150         struct tdb_used_record rec;
151
152         if (hprefix_bits >= 64)
153                 return check_hash_chain(tdb, off, hprefix, used, num_used,
154                                         num_found, check, private_data);
155
156         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
157                 return false;
158
159         if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
160                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
161                            "tdb_check: Bad hash table magic %llu",
162                            (long long)rec_magic(&rec));
163                 return false;
164         }
165         if (rec_data_length(&rec)
166             != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
167                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
168                            "tdb_check: Bad hash table length %llu vs %llu",
169                            (long long)rec_data_length(&rec),
170                            (long long)sizeof(tdb_off_t)
171                            << TDB_SUBLEVEL_HASH_BITS);
172                 return false;
173         }
174         if (rec_key_length(&rec) != 0) {
175                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
176                          "tdb_check: Bad hash table key length %llu",
177                          (long long)rec_key_length(&rec));
178                 return false;
179         }
180         if (rec_hash(&rec) != 0) {
181                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
182                          "tdb_check: Bad hash table hash value %llu",
183                          (long long)rec_hash(&rec));
184                 return false;
185         }
186
187         off += sizeof(rec);
188         return check_hash_tree(tdb, off,
189                                TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
190                                hprefix, hprefix_bits,
191                                used, num_used, num_found, check, private_data);
192 }
193
194 static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
195 {
196         /* Can overflow an int. */
197         return *a > *b ? 1
198                 : *a < *b ? -1
199                 : 0;
200 }
201
202 static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
203 {
204         *used += num;
205
206         return (h >> (64 - *used)) & ((1U << num) - 1);
207 }
208
209 static bool check_hash_tree(struct tdb_context *tdb,
210                             tdb_off_t off, unsigned int group_bits,
211                             uint64_t hprefix,
212                             unsigned hprefix_bits,
213                             tdb_off_t used[],
214                             size_t num_used,
215                             size_t *num_found,
216                             int (*check)(TDB_DATA, TDB_DATA, void *),
217                             void *private_data)
218 {
219         unsigned int g, b;
220         const tdb_off_t *hash;
221         struct tdb_used_record rec;
222
223         hash = tdb_access_read(tdb, off,
224                                sizeof(tdb_off_t)
225                                << (group_bits + TDB_HASH_GROUP_BITS),
226                                true);
227         if (!hash)
228                 return false;
229
230         for (g = 0; g < (1 << group_bits); g++) {
231                 const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
232                 for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
233                         unsigned int bucket, i, used_bits;
234                         uint64_t h;
235                         tdb_off_t *p;
236                         if (group[b] == 0)
237                                 continue;
238
239                         off = group[b] & TDB_OFF_MASK;
240                         p = asearch(&off, used, num_used, off_cmp);
241                         if (!p) {
242                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
243                                            TDB_LOG_ERROR,
244                                            "tdb_check: Invalid offset %llu "
245                                            "in hash", (long long)off);
246                                 goto fail;
247                         }
248                         /* Mark it invalid. */
249                         *p ^= 1;
250                         (*num_found)++;
251
252                         if (hprefix_bits == 64) {
253                                 /* Chained entries are unordered. */
254                                 if (is_subhash(group[b])) {
255                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
256                                                    TDB_LOG_ERROR,
257                                                    "tdb_check: Invalid chain"
258                                                    " entry subhash");
259                                         goto fail;
260                                 }
261                                 h = hash_record(tdb, off);
262                                 if (h != hprefix) {
263                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
264                                                    TDB_LOG_ERROR,
265                                                    "check: bad hash chain"
266                                                    " placement"
267                                                    " 0x%llx vs 0x%llx",
268                                                    (long long)h,
269                                                    (long long)hprefix);
270                                         goto fail;
271                                 }
272                                 if (tdb_read_convert(tdb, off, &rec,
273                                                      sizeof(rec)))
274                                         goto fail;
275                                 goto check;
276                         }
277
278                         if (is_subhash(group[b])) {
279                                 uint64_t subprefix;
280                                 subprefix = (hprefix
281                                      << (group_bits + TDB_HASH_GROUP_BITS))
282                                         + g * (1 << TDB_HASH_GROUP_BITS) + b;
283
284                                 if (!check_hash_record(tdb,
285                                                group[b] & TDB_OFF_MASK,
286                                                subprefix,
287                                                hprefix_bits
288                                                        + group_bits
289                                                        + TDB_HASH_GROUP_BITS,
290                                                used, num_used, num_found,
291                                                check, private_data))
292                                         goto fail;
293                                 continue;
294                         }
295                         /* A normal entry */
296
297                         /* Does it belong here at all? */
298                         h = hash_record(tdb, off);
299                         used_bits = 0;
300                         if (get_bits(h, hprefix_bits, &used_bits) != hprefix
301                             && hprefix_bits) {
302                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
303                                            TDB_LOG_ERROR,
304                                            "check: bad hash placement"
305                                            " 0x%llx vs 0x%llx",
306                                          (long long)h, (long long)hprefix);
307                                 goto fail;
308                         }
309
310                         /* Does it belong in this group? */
311                         if (get_bits(h, group_bits, &used_bits) != g) {
312                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
313                                            TDB_LOG_ERROR,
314                                            "check: bad group %llu vs %u",
315                                            (long long)h, g);
316                                 goto fail;
317                         }
318
319                         /* Are bucket bits correct? */
320                         bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
321                         if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
322                             != bucket) {
323                                 used_bits -= TDB_HASH_GROUP_BITS;
324                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
325                                            TDB_LOG_ERROR,
326                                          "check: bad bucket %u vs %u",
327                                          (unsigned)get_bits(h,
328                                                         TDB_HASH_GROUP_BITS,
329                                                         &used_bits),
330                                          bucket);
331                                 goto fail;
332                         }
333
334                         /* There must not be any zero entries between
335                          * the bucket it belongs in and this one! */
336                         for (i = bucket;
337                              i != b;
338                              i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
339                                 if (group[i] == 0) {
340                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
341                                                    TDB_LOG_ERROR,
342                                                    "check: bad group placement"
343                                                    " %u vs %u",
344                                                    b, bucket);
345                                         goto fail;
346                                 }
347                         }
348
349                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)))
350                                 goto fail;
351
352                         /* Bottom bits must match header. */
353                         if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
354                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
355                                            TDB_LOG_ERROR,
356                                            "tdb_check: Bad hash magic at"
357                                            " offset %llu (0x%llx vs 0x%llx)",
358                                            (long long)off,
359                                            (long long)h,
360                                            (long long)rec_hash(&rec));
361                                 goto fail;
362                         }
363
364                 check:
365                         if (check) {
366                                 TDB_DATA key, data;
367                                 key.dsize = rec_key_length(&rec);
368                                 data.dsize = rec_data_length(&rec);
369                                 key.dptr = (void *)tdb_access_read(tdb,
370                                                    off + sizeof(rec),
371                                                    key.dsize + data.dsize,
372                                                    false);
373                                 if (!key.dptr)
374                                         goto fail;
375                                 data.dptr = key.dptr + key.dsize;
376                                 if (check(key, data, private_data) != 0)
377                                         goto fail;
378                                 tdb_access_release(tdb, key.dptr);
379                         }
380                 }
381         }
382         tdb_access_release(tdb, hash);
383         return true;
384
385 fail:
386         tdb_access_release(tdb, hash);
387         return false;
388 }
389
390 static bool check_hash(struct tdb_context *tdb,
391                        tdb_off_t used[],
392                        size_t num_used, size_t num_ftables,
393                        int (*check)(TDB_DATA, TDB_DATA, void *),
394                        void *private_data)
395 {
396         /* Free tables also show up as used. */
397         size_t num_found = num_ftables;
398
399         if (!check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
400                              TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
401                              0, 0, used, num_used, &num_found,
402                              check, private_data))
403                 return false;
404
405         if (num_found != num_used) {
406                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
407                            "tdb_check: Not all entries are in hash");
408                 return false;
409         }
410         return true;
411 }
412
413 static bool check_free(struct tdb_context *tdb,
414                        tdb_off_t off,
415                        const struct tdb_free_record *frec,
416                        tdb_off_t prev, unsigned int ftable,
417                        unsigned int bucket)
418 {
419         enum TDB_ERROR ecode;
420
421         if (frec_magic(frec) != TDB_FREE_MAGIC) {
422                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
423                            "tdb_check: offset %llu bad magic 0x%llx",
424                            (long long)off, (long long)frec->magic_and_prev);
425                 return false;
426         }
427         if (frec_ftable(frec) != ftable) {
428                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
429                            "tdb_check: offset %llu bad freetable %u",
430                            (long long)off, frec_ftable(frec));
431                 return false;
432         }
433
434         ecode = tdb->methods->oob(tdb, off
435                                   + frec_len(frec)
436                                   + sizeof(struct tdb_used_record),
437                                   false);
438         if (ecode != TDB_SUCCESS) {
439                 tdb->ecode = ecode;
440                 return false;
441         }
442         if (size_to_bucket(frec_len(frec)) != bucket) {
443                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
444                            "tdb_check: offset %llu in wrong bucket %u vs %u",
445                            (long long)off,
446                            bucket, size_to_bucket(frec_len(frec)));
447                 return false;
448         }
449         if (prev != frec_prev(frec)) {
450                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
451                            "tdb_check: offset %llu bad prev %llu vs %llu",
452                            (long long)off,
453                            (long long)prev, (long long)frec_len(frec));
454                 return false;
455         }
456         return true;
457 }
458
459 static bool check_free_table(struct tdb_context *tdb,
460                              tdb_off_t ftable_off,
461                              unsigned ftable_num,
462                              tdb_off_t fr[],
463                              size_t num_free,
464                              size_t *num_found)
465 {
466         struct tdb_freetable ft;
467         tdb_off_t h;
468         unsigned int i;
469
470         if (tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft)) == -1)
471                 return false;
472
473         if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
474             || rec_key_length(&ft.hdr) != 0
475             || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
476             || rec_hash(&ft.hdr) != 0) {
477                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
478                            "tdb_check: Invalid header on free table");
479                 return false;
480         }
481
482         for (i = 0; i < TDB_FREE_BUCKETS; i++) {
483                 tdb_off_t off, prev = 0, *p;
484                 struct tdb_free_record f;
485
486                 h = bucket_off(ftable_off, i);
487                 for (off = tdb_read_off(tdb, h); off; off = f.next) {
488                         if (off == TDB_OFF_ERR)
489                                 return false;
490                         if (tdb_read_convert(tdb, off, &f, sizeof(f)))
491                                 return false;
492                         if (!check_free(tdb, off, &f, prev, ftable_num, i))
493                                 return false;
494
495                         /* FIXME: Check hash bits */
496                         p = asearch(&off, fr, num_free, off_cmp);
497                         if (!p) {
498                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
499                                            TDB_LOG_ERROR,
500                                            "tdb_check: Invalid offset"
501                                            " %llu in free table",
502                                            (long long)off);
503                                 return false;
504                         }
505                         /* Mark it invalid. */
506                         *p ^= 1;
507                         (*num_found)++;
508                         prev = off;
509                 }
510         }
511         return true;
512 }
513
514 /* Slow, but should be very rare. */
515 size_t dead_space(struct tdb_context *tdb, tdb_off_t off)
516 {
517         size_t len;
518         enum TDB_ERROR ecode;
519
520         for (len = 0; off + len < tdb->map_size; len++) {
521                 char c;
522                 ecode = tdb->methods->tread(tdb, off, &c, 1);
523                 if (ecode != TDB_SUCCESS) {
524                         tdb->ecode = ecode;
525                         return 0;
526                 }
527                 if (c != 0 && c != 0x43)
528                         break;
529         }
530         return len;
531 }
532
533 static bool check_linear(struct tdb_context *tdb,
534                          tdb_off_t **used, size_t *num_used,
535                          tdb_off_t **fr, size_t *num_free,
536                          tdb_off_t recovery)
537 {
538         tdb_off_t off;
539         tdb_len_t len;
540         bool found_recovery = false;
541
542         for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) {
543                 union {
544                         struct tdb_used_record u;
545                         struct tdb_free_record f;
546                         struct tdb_recovery_record r;
547                 } rec;
548                 /* r is larger: only get that if we need to. */
549                 if (tdb_read_convert(tdb, off, &rec, sizeof(rec.f)) == -1)
550                         return false;
551
552                 /* If we crash after ftruncate, we can get zeroes or fill. */
553                 if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
554                     || rec.r.magic ==  0x4343434343434343ULL) {
555                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec.r)))
556                                 return false;
557
558                         if (recovery == off) {
559                                 found_recovery = true;
560                                 len = sizeof(rec.r) + rec.r.max_len;
561                         } else {
562                                 len = dead_space(tdb, off);
563                                 if (len < sizeof(rec.r)) {
564                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
565                                                    TDB_LOG_ERROR,
566                                                    "tdb_check: invalid dead"
567                                                    " space at %zu",
568                                                    (size_t)off);
569                                         return false;
570                                 }
571
572                                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
573                                            "Dead space at %zu-%zu (of %zu)",
574                                            (size_t)off, (size_t)(off + len),
575                                            (size_t)tdb->map_size);
576                         }
577                 } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
578                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec.r)))
579                                 return false;
580                         if (recovery != off) {
581                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
582                                            TDB_LOG_ERROR,
583                                            "tdb_check: unexpected recovery"
584                                            " record at offset %zu",
585                                            (size_t)off);
586                                 return false;
587                         }
588                         if (rec.r.len > rec.r.max_len) {
589                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
590                                            TDB_LOG_ERROR,
591                                            "tdb_check: invalid recovery length"
592                                            " %zu", (size_t)rec.r.len);
593                                 return false;
594                         }
595                         if (rec.r.eof > tdb->map_size) {
596                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
597                                            TDB_LOG_ERROR,
598                                            "tdb_check: invalid old EOF"
599                                            " %zu", (size_t)rec.r.eof);
600                                 return false;
601                         }
602                         found_recovery = true;
603                         len = sizeof(rec.r) + rec.r.max_len;
604                 } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
605                         len = sizeof(rec.u) + frec_len(&rec.f);
606                         if (off + len > tdb->map_size) {
607                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
608                                            TDB_LOG_ERROR,
609                                            "tdb_check: free overlength %llu"
610                                            " at offset %llu",
611                                            (long long)len, (long long)off);
612                                 return false;
613                         }
614                         /* This record should be in free lists. */
615                         if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
616                             && !append(fr, num_free, off)) {
617                                 tdb_logerr(tdb, TDB_ERR_OOM,
618                                            TDB_LOG_ERROR,
619                                            "tdb_check: tracking %zu'th"
620                                            " free record.", *num_free);
621                                 return false;
622                         }
623                 } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
624                            || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
625                            || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
626                            || rec_magic(&rec.u) == TDB_FTABLE_MAGIC) {
627                         uint64_t klen, dlen, extra;
628
629                         /* This record is used! */
630                         if (!append(used, num_used, off)) {
631                                 tdb_logerr(tdb, TDB_ERR_OOM,
632                                            TDB_LOG_ERROR,
633                                            "tdb_check: tracking %zu'th"
634                                            " used record.", *num_used);
635                                 return false;
636                         }
637
638                         klen = rec_key_length(&rec.u);
639                         dlen = rec_data_length(&rec.u);
640                         extra = rec_extra_padding(&rec.u);
641
642                         len = sizeof(rec.u) + klen + dlen + extra;
643                         if (off + len > tdb->map_size) {
644                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
645                                            TDB_LOG_ERROR,
646                                            "tdb_check: used overlength %llu"
647                                            " at offset %llu",
648                                            (long long)len, (long long)off);
649                                 return false;
650                         }
651
652                         if (len < sizeof(rec.f)) {
653                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
654                                            TDB_LOG_ERROR,
655                                            "tdb_check: too short record %llu"
656                                            " at %llu",
657                                            (long long)len, (long long)off);
658                                 return false;
659                         }
660                 } else {
661                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
662                                    TDB_LOG_ERROR,
663                                    "tdb_check: Bad magic 0x%llx at offset %zu",
664                                    (long long)rec_magic(&rec.u), (size_t)off);
665                         return false;
666                 }
667         }
668
669         /* We must have found recovery area if there was one. */
670         if (recovery != 0 && !found_recovery) {
671                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
672                            "tdb_check: expected a recovery area at %zu",
673                            (size_t)recovery);
674                 return false;
675         }
676
677         return true;
678 }
679
680 int tdb_check(struct tdb_context *tdb,
681               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
682               void *private_data)
683 {
684         tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
685         size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0;
686         enum TDB_ERROR ecode;
687
688         ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
689         if (ecpde != TDB_SUCCESS) {
690                 tdb->ecode = ecode;
691                 return -1;
692         }
693
694         ecode = tdb_lock_expand(tdb, F_RDLCK);
695         if (ecode != TDB_SUCCESS) {
696                 tdb->ecode = ecode;
697                 tdb_allrecord_unlock(tdb, F_RDLCK);
698                 return -1;
699         }
700
701         if (!check_header(tdb, &recovery))
702                 goto fail;
703
704         /* First we do a linear scan, checking all records. */
705         if (!check_linear(tdb, &used, &num_used, &fr, &num_free, recovery))
706                 goto fail;
707
708         for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
709                 if (ft == TDB_OFF_ERR)
710                         goto fail;
711                 if (!check_free_table(tdb, ft, num_ftables, fr, num_free,
712                                       &num_found))
713                         goto fail;
714                 num_ftables++;
715         }
716
717         /* FIXME: Check key uniqueness? */
718         if (!check_hash(tdb, used, num_used, num_ftables, check, private_data))
719                 goto fail;
720
721         if (num_found != num_free) {
722                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
723                            "tdb_check: Not all entries are in free table");
724                 return -1;
725         }
726
727         tdb_allrecord_unlock(tdb, F_RDLCK);
728         tdb_unlock_expand(tdb, F_RDLCK);
729         free(fr);
730         free(used);
731         return 0;
732
733 fail:
734         free(fr);
735         free(used);
736         tdb_allrecord_unlock(tdb, F_RDLCK);
737         tdb_unlock_expand(tdb, F_RDLCK);
738         return -1;
739 }