]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/check.c
tdb2: rework some io.c functions to encode errors in their pointer returns.
[ccan] / ccan / tdb2 / check.c
1  /*
2    Trivial Database 2: free list/block handling
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/likely/likely.h>
20 #include <ccan/asearch/asearch.h>
21
22 /* We keep an ordered array of offsets. */
23 static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
24 {
25         tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
26         if (!new)
27                 return false;
28         new[(*num)++] = off;
29         *arr = new;
30         return true;
31 }
32
33 static bool check_header(struct tdb_context *tdb, tdb_off_t *recovery)
34 {
35         uint64_t hash_test;
36         struct tdb_header hdr;
37         enum TDB_ERROR ecode;
38
39         ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
40         if (ecode != TDB_SUCCESS) {
41                 tdb->ecode = ecode;
42                 return false;
43         }
44         /* magic food should not be converted, so convert back. */
45         tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
46
47         hash_test = TDB_HASH_MAGIC;
48         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
49         if (hdr.hash_test != hash_test) {
50                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
51                            "check: hash test %llu should be %llu",
52                            (long long)hdr.hash_test,
53                            (long long)hash_test);
54                 return false;
55         }
56
57         if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
58                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
59                            "check: bad magic '%.*s'",
60                            (unsigned)sizeof(hdr.magic_food), hdr.magic_food);
61                 return false;
62         }
63
64         *recovery = hdr.recovery;
65         if (*recovery) {
66                 if (*recovery < sizeof(hdr) || *recovery > tdb->map_size) {
67                         tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
68                                  "tdb_check: invalid recovery offset %zu",
69                                  (size_t)*recovery);
70                         return false;
71                 }
72         }
73
74         /* Don't check reserved: they *can* be used later. */
75         return true;
76 }
77
78 static bool check_hash_tree(struct tdb_context *tdb,
79                             tdb_off_t off, unsigned int group_bits,
80                             uint64_t hprefix,
81                             unsigned hprefix_bits,
82                             tdb_off_t used[],
83                             size_t num_used,
84                             size_t *num_found,
85                             int (*check)(TDB_DATA, TDB_DATA, void *),
86                             void *private_data);
87
88 static bool check_hash_chain(struct tdb_context *tdb,
89                              tdb_off_t off,
90                              uint64_t hash,
91                              tdb_off_t used[],
92                              size_t num_used,
93                              size_t *num_found,
94                              int (*check)(TDB_DATA, TDB_DATA, void *),
95                              void *private_data)
96 {
97         struct tdb_used_record rec;
98         enum TDB_ERROR ecode;
99
100         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
101         if (ecode != TDB_SUCCESS) {
102                 tdb->ecode = ecode;
103                 return false;
104         }
105
106         if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
107                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
108                            "tdb_check: Bad hash chain magic %llu",
109                            (long long)rec_magic(&rec));
110                 return false;
111         }
112
113         if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
114                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
115                            "tdb_check: Bad hash chain length %llu vs %zu",
116                            (long long)rec_data_length(&rec),
117                            sizeof(struct tdb_chain));
118                 return false;
119         }
120         if (rec_key_length(&rec) != 0) {
121                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
122                          "tdb_check: Bad hash chain key length %llu",
123                          (long long)rec_key_length(&rec));
124                 return false;
125         }
126         if (rec_hash(&rec) != 0) {
127                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
128                          "tdb_check: Bad hash chain hash value %llu",
129                          (long long)rec_hash(&rec));
130                 return false;
131         }
132
133         off += sizeof(rec);
134         if (!check_hash_tree(tdb, off, 0, hash, 64,
135                              used, num_used, num_found, check, private_data))
136                 return false;
137
138         off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
139         if (off == TDB_OFF_ERR)
140                 return false;
141         if (off == 0)
142                 return true;
143         (*num_found)++;
144         return check_hash_chain(tdb, off, hash, used, num_used, num_found,
145                                 check, private_data);
146 }
147
148 static bool check_hash_record(struct tdb_context *tdb,
149                               tdb_off_t off,
150                               uint64_t hprefix,
151                               unsigned hprefix_bits,
152                               tdb_off_t used[],
153                               size_t num_used,
154                               size_t *num_found,
155                               int (*check)(TDB_DATA, TDB_DATA, void *),
156                               void *private_data)
157 {
158         struct tdb_used_record rec;
159         enum TDB_ERROR ecode;
160
161         if (hprefix_bits >= 64)
162                 return check_hash_chain(tdb, off, hprefix, used, num_used,
163                                         num_found, check, private_data);
164
165         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
166         if (ecode != TDB_SUCCESS) {
167                 tdb->ecode = ecode;
168                 return false;
169         }
170
171         if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
172                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
173                            "tdb_check: Bad hash table magic %llu",
174                            (long long)rec_magic(&rec));
175                 return false;
176         }
177         if (rec_data_length(&rec)
178             != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
179                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
180                            "tdb_check: Bad hash table length %llu vs %llu",
181                            (long long)rec_data_length(&rec),
182                            (long long)sizeof(tdb_off_t)
183                            << TDB_SUBLEVEL_HASH_BITS);
184                 return false;
185         }
186         if (rec_key_length(&rec) != 0) {
187                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
188                          "tdb_check: Bad hash table key length %llu",
189                          (long long)rec_key_length(&rec));
190                 return false;
191         }
192         if (rec_hash(&rec) != 0) {
193                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
194                          "tdb_check: Bad hash table hash value %llu",
195                          (long long)rec_hash(&rec));
196                 return false;
197         }
198
199         off += sizeof(rec);
200         return check_hash_tree(tdb, off,
201                                TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
202                                hprefix, hprefix_bits,
203                                used, num_used, num_found, check, private_data);
204 }
205
206 static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
207 {
208         /* Can overflow an int. */
209         return *a > *b ? 1
210                 : *a < *b ? -1
211                 : 0;
212 }
213
214 static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
215 {
216         *used += num;
217
218         return (h >> (64 - *used)) & ((1U << num) - 1);
219 }
220
221 static bool check_hash_tree(struct tdb_context *tdb,
222                             tdb_off_t off, unsigned int group_bits,
223                             uint64_t hprefix,
224                             unsigned hprefix_bits,
225                             tdb_off_t used[],
226                             size_t num_used,
227                             size_t *num_found,
228                             int (*check)(TDB_DATA, TDB_DATA, void *),
229                             void *private_data)
230 {
231         unsigned int g, b;
232         const tdb_off_t *hash;
233         struct tdb_used_record rec;
234         enum TDB_ERROR ecode;
235
236         hash = tdb_access_read(tdb, off,
237                                sizeof(tdb_off_t)
238                                << (group_bits + TDB_HASH_GROUP_BITS),
239                                true);
240         if (TDB_PTR_IS_ERR(hash)) {
241                 tdb->ecode = TDB_PTR_ERR(hash);
242                 return false;
243         }
244
245         for (g = 0; g < (1 << group_bits); g++) {
246                 const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
247                 for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
248                         unsigned int bucket, i, used_bits;
249                         uint64_t h;
250                         tdb_off_t *p;
251                         if (group[b] == 0)
252                                 continue;
253
254                         off = group[b] & TDB_OFF_MASK;
255                         p = asearch(&off, used, num_used, off_cmp);
256                         if (!p) {
257                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
258                                            TDB_LOG_ERROR,
259                                            "tdb_check: Invalid offset %llu "
260                                            "in hash", (long long)off);
261                                 goto fail;
262                         }
263                         /* Mark it invalid. */
264                         *p ^= 1;
265                         (*num_found)++;
266
267                         if (hprefix_bits == 64) {
268                                 /* Chained entries are unordered. */
269                                 if (is_subhash(group[b])) {
270                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
271                                                    TDB_LOG_ERROR,
272                                                    "tdb_check: Invalid chain"
273                                                    " entry subhash");
274                                         goto fail;
275                                 }
276                                 h = hash_record(tdb, off);
277                                 if (h != hprefix) {
278                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
279                                                    TDB_LOG_ERROR,
280                                                    "check: bad hash chain"
281                                                    " placement"
282                                                    " 0x%llx vs 0x%llx",
283                                                    (long long)h,
284                                                    (long long)hprefix);
285                                         goto fail;
286                                 }
287                                 ecode = tdb_read_convert(tdb, off, &rec,
288                                                          sizeof(rec));
289                                 if (ecode != TDB_SUCCESS) {
290                                         tdb->ecode = ecode;
291                                         goto fail;
292                                 }
293                                 goto check;
294                         }
295
296                         if (is_subhash(group[b])) {
297                                 uint64_t subprefix;
298                                 subprefix = (hprefix
299                                      << (group_bits + TDB_HASH_GROUP_BITS))
300                                         + g * (1 << TDB_HASH_GROUP_BITS) + b;
301
302                                 if (!check_hash_record(tdb,
303                                                group[b] & TDB_OFF_MASK,
304                                                subprefix,
305                                                hprefix_bits
306                                                        + group_bits
307                                                        + TDB_HASH_GROUP_BITS,
308                                                used, num_used, num_found,
309                                                check, private_data))
310                                         goto fail;
311                                 continue;
312                         }
313                         /* A normal entry */
314
315                         /* Does it belong here at all? */
316                         h = hash_record(tdb, off);
317                         used_bits = 0;
318                         if (get_bits(h, hprefix_bits, &used_bits) != hprefix
319                             && hprefix_bits) {
320                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
321                                            TDB_LOG_ERROR,
322                                            "check: bad hash placement"
323                                            " 0x%llx vs 0x%llx",
324                                          (long long)h, (long long)hprefix);
325                                 goto fail;
326                         }
327
328                         /* Does it belong in this group? */
329                         if (get_bits(h, group_bits, &used_bits) != g) {
330                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
331                                            TDB_LOG_ERROR,
332                                            "check: bad group %llu vs %u",
333                                            (long long)h, g);
334                                 goto fail;
335                         }
336
337                         /* Are bucket bits correct? */
338                         bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
339                         if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
340                             != bucket) {
341                                 used_bits -= TDB_HASH_GROUP_BITS;
342                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
343                                            TDB_LOG_ERROR,
344                                          "check: bad bucket %u vs %u",
345                                          (unsigned)get_bits(h,
346                                                         TDB_HASH_GROUP_BITS,
347                                                         &used_bits),
348                                          bucket);
349                                 goto fail;
350                         }
351
352                         /* There must not be any zero entries between
353                          * the bucket it belongs in and this one! */
354                         for (i = bucket;
355                              i != b;
356                              i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
357                                 if (group[i] == 0) {
358                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
359                                                    TDB_LOG_ERROR,
360                                                    "check: bad group placement"
361                                                    " %u vs %u",
362                                                    b, bucket);
363                                         goto fail;
364                                 }
365                         }
366
367                         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
368                         if (ecode != TDB_SUCCESS) {
369                                 tdb->ecode = ecode;
370                                 goto fail;
371                         }
372
373                         /* Bottom bits must match header. */
374                         if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
375                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
376                                            TDB_LOG_ERROR,
377                                            "tdb_check: Bad hash magic at"
378                                            " offset %llu (0x%llx vs 0x%llx)",
379                                            (long long)off,
380                                            (long long)h,
381                                            (long long)rec_hash(&rec));
382                                 goto fail;
383                         }
384
385                 check:
386                         if (check) {
387                                 TDB_DATA key, data;
388                                 key.dsize = rec_key_length(&rec);
389                                 data.dsize = rec_data_length(&rec);
390                                 key.dptr = (void *)tdb_access_read(tdb,
391                                                    off + sizeof(rec),
392                                                    key.dsize + data.dsize,
393                                                    false);
394                                 if (TDB_PTR_IS_ERR(key.dptr)) {
395                                         tdb->ecode = TDB_PTR_ERR(key.dptr);
396                                         goto fail;
397                                 }
398                                 data.dptr = key.dptr + key.dsize;
399                                 if (check(key, data, private_data) != 0)
400                                         goto fail;
401                                 tdb_access_release(tdb, key.dptr);
402                         }
403                 }
404         }
405         tdb_access_release(tdb, hash);
406         return true;
407
408 fail:
409         tdb_access_release(tdb, hash);
410         return false;
411 }
412
413 static bool check_hash(struct tdb_context *tdb,
414                        tdb_off_t used[],
415                        size_t num_used, size_t num_ftables,
416                        int (*check)(TDB_DATA, TDB_DATA, void *),
417                        void *private_data)
418 {
419         /* Free tables also show up as used. */
420         size_t num_found = num_ftables;
421
422         if (!check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
423                              TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
424                              0, 0, used, num_used, &num_found,
425                              check, private_data))
426                 return false;
427
428         if (num_found != num_used) {
429                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
430                            "tdb_check: Not all entries are in hash");
431                 return false;
432         }
433         return true;
434 }
435
436 static bool check_free(struct tdb_context *tdb,
437                        tdb_off_t off,
438                        const struct tdb_free_record *frec,
439                        tdb_off_t prev, unsigned int ftable,
440                        unsigned int bucket)
441 {
442         enum TDB_ERROR ecode;
443
444         if (frec_magic(frec) != TDB_FREE_MAGIC) {
445                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
446                            "tdb_check: offset %llu bad magic 0x%llx",
447                            (long long)off, (long long)frec->magic_and_prev);
448                 return false;
449         }
450         if (frec_ftable(frec) != ftable) {
451                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
452                            "tdb_check: offset %llu bad freetable %u",
453                            (long long)off, frec_ftable(frec));
454                 return false;
455         }
456
457         ecode = tdb->methods->oob(tdb, off
458                                   + frec_len(frec)
459                                   + sizeof(struct tdb_used_record),
460                                   false);
461         if (ecode != TDB_SUCCESS) {
462                 tdb->ecode = ecode;
463                 return false;
464         }
465         if (size_to_bucket(frec_len(frec)) != bucket) {
466                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
467                            "tdb_check: offset %llu in wrong bucket %u vs %u",
468                            (long long)off,
469                            bucket, size_to_bucket(frec_len(frec)));
470                 return false;
471         }
472         if (prev != frec_prev(frec)) {
473                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
474                            "tdb_check: offset %llu bad prev %llu vs %llu",
475                            (long long)off,
476                            (long long)prev, (long long)frec_len(frec));
477                 return false;
478         }
479         return true;
480 }
481
482 static bool check_free_table(struct tdb_context *tdb,
483                              tdb_off_t ftable_off,
484                              unsigned ftable_num,
485                              tdb_off_t fr[],
486                              size_t num_free,
487                              size_t *num_found)
488 {
489         struct tdb_freetable ft;
490         tdb_off_t h;
491         unsigned int i;
492         enum TDB_ERROR ecode;
493
494         ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
495         if (ecode != TDB_SUCCESS) {
496                 tdb->ecode = ecode;
497                 return false;
498         }
499
500         if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
501             || rec_key_length(&ft.hdr) != 0
502             || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
503             || rec_hash(&ft.hdr) != 0) {
504                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
505                            "tdb_check: Invalid header on free table");
506                 return false;
507         }
508
509         for (i = 0; i < TDB_FREE_BUCKETS; i++) {
510                 tdb_off_t off, prev = 0, *p;
511                 struct tdb_free_record f;
512
513                 h = bucket_off(ftable_off, i);
514                 for (off = tdb_read_off(tdb, h); off; off = f.next) {
515                         if (off == TDB_OFF_ERR)
516                                 return false;
517                         ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
518                         if (ecode != TDB_SUCCESS) {
519                                 tdb->ecode = ecode;
520                                 return false;
521                         }
522                         if (!check_free(tdb, off, &f, prev, ftable_num, i))
523                                 return false;
524
525                         /* FIXME: Check hash bits */
526                         p = asearch(&off, fr, num_free, off_cmp);
527                         if (!p) {
528                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
529                                            TDB_LOG_ERROR,
530                                            "tdb_check: Invalid offset"
531                                            " %llu in free table",
532                                            (long long)off);
533                                 return false;
534                         }
535                         /* Mark it invalid. */
536                         *p ^= 1;
537                         (*num_found)++;
538                         prev = off;
539                 }
540         }
541         return true;
542 }
543
544 /* Slow, but should be very rare. */
545 size_t dead_space(struct tdb_context *tdb, tdb_off_t off)
546 {
547         size_t len;
548         enum TDB_ERROR ecode;
549
550         for (len = 0; off + len < tdb->map_size; len++) {
551                 char c;
552                 ecode = tdb->methods->tread(tdb, off, &c, 1);
553                 if (ecode != TDB_SUCCESS) {
554                         tdb->ecode = ecode;
555                         return 0;
556                 }
557                 if (c != 0 && c != 0x43)
558                         break;
559         }
560         return len;
561 }
562
563 static bool check_linear(struct tdb_context *tdb,
564                          tdb_off_t **used, size_t *num_used,
565                          tdb_off_t **fr, size_t *num_free,
566                          tdb_off_t recovery)
567 {
568         tdb_off_t off;
569         tdb_len_t len;
570         enum TDB_ERROR ecode;
571         bool found_recovery = false;
572
573         for (off = sizeof(struct tdb_header); off < tdb->map_size; off += len) {
574                 union {
575                         struct tdb_used_record u;
576                         struct tdb_free_record f;
577                         struct tdb_recovery_record r;
578                 } rec;
579                 /* r is larger: only get that if we need to. */
580                 ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
581                 if (ecode != TDB_SUCCESS) {
582                         tdb->ecode = ecode;
583                         return false;
584                 }
585
586                 /* If we crash after ftruncate, we can get zeroes or fill. */
587                 if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
588                     || rec.r.magic ==  0x4343434343434343ULL) {
589                         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
590                         if (ecode != TDB_SUCCESS) {
591                                 tdb->ecode = ecode;
592                                 return false;
593                         }
594                         if (recovery == off) {
595                                 found_recovery = true;
596                                 len = sizeof(rec.r) + rec.r.max_len;
597                         } else {
598                                 len = dead_space(tdb, off);
599                                 if (len < sizeof(rec.r)) {
600                                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
601                                                    TDB_LOG_ERROR,
602                                                    "tdb_check: invalid dead"
603                                                    " space at %zu",
604                                                    (size_t)off);
605                                         return false;
606                                 }
607
608                                 tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
609                                            "Dead space at %zu-%zu (of %zu)",
610                                            (size_t)off, (size_t)(off + len),
611                                            (size_t)tdb->map_size);
612                         }
613                 } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
614                         ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
615                         if (ecode != TDB_SUCCESS) {
616                                 tdb->ecode = ecode;
617                                 return false;
618                         }
619                         if (recovery != off) {
620                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
621                                            TDB_LOG_ERROR,
622                                            "tdb_check: unexpected recovery"
623                                            " record at offset %zu",
624                                            (size_t)off);
625                                 return false;
626                         }
627                         if (rec.r.len > rec.r.max_len) {
628                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
629                                            TDB_LOG_ERROR,
630                                            "tdb_check: invalid recovery length"
631                                            " %zu", (size_t)rec.r.len);
632                                 return false;
633                         }
634                         if (rec.r.eof > tdb->map_size) {
635                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
636                                            TDB_LOG_ERROR,
637                                            "tdb_check: invalid old EOF"
638                                            " %zu", (size_t)rec.r.eof);
639                                 return false;
640                         }
641                         found_recovery = true;
642                         len = sizeof(rec.r) + rec.r.max_len;
643                 } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
644                         len = sizeof(rec.u) + frec_len(&rec.f);
645                         if (off + len > tdb->map_size) {
646                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
647                                            TDB_LOG_ERROR,
648                                            "tdb_check: free overlength %llu"
649                                            " at offset %llu",
650                                            (long long)len, (long long)off);
651                                 return false;
652                         }
653                         /* This record should be in free lists. */
654                         if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
655                             && !append(fr, num_free, off)) {
656                                 tdb_logerr(tdb, TDB_ERR_OOM,
657                                            TDB_LOG_ERROR,
658                                            "tdb_check: tracking %zu'th"
659                                            " free record.", *num_free);
660                                 return false;
661                         }
662                 } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
663                            || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
664                            || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
665                            || rec_magic(&rec.u) == TDB_FTABLE_MAGIC) {
666                         uint64_t klen, dlen, extra;
667
668                         /* This record is used! */
669                         if (!append(used, num_used, off)) {
670                                 tdb_logerr(tdb, TDB_ERR_OOM,
671                                            TDB_LOG_ERROR,
672                                            "tdb_check: tracking %zu'th"
673                                            " used record.", *num_used);
674                                 return false;
675                         }
676
677                         klen = rec_key_length(&rec.u);
678                         dlen = rec_data_length(&rec.u);
679                         extra = rec_extra_padding(&rec.u);
680
681                         len = sizeof(rec.u) + klen + dlen + extra;
682                         if (off + len > tdb->map_size) {
683                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
684                                            TDB_LOG_ERROR,
685                                            "tdb_check: used overlength %llu"
686                                            " at offset %llu",
687                                            (long long)len, (long long)off);
688                                 return false;
689                         }
690
691                         if (len < sizeof(rec.f)) {
692                                 tdb_logerr(tdb, TDB_ERR_CORRUPT,
693                                            TDB_LOG_ERROR,
694                                            "tdb_check: too short record %llu"
695                                            " at %llu",
696                                            (long long)len, (long long)off);
697                                 return false;
698                         }
699                 } else {
700                         tdb_logerr(tdb, TDB_ERR_CORRUPT,
701                                    TDB_LOG_ERROR,
702                                    "tdb_check: Bad magic 0x%llx at offset %zu",
703                                    (long long)rec_magic(&rec.u), (size_t)off);
704                         return false;
705                 }
706         }
707
708         /* We must have found recovery area if there was one. */
709         if (recovery != 0 && !found_recovery) {
710                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
711                            "tdb_check: expected a recovery area at %zu",
712                            (size_t)recovery);
713                 return false;
714         }
715
716         return true;
717 }
718
719 int tdb_check(struct tdb_context *tdb,
720               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
721               void *private_data)
722 {
723         tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
724         size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0;
725         enum TDB_ERROR ecode;
726
727         ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
728         if (ecpde != TDB_SUCCESS) {
729                 tdb->ecode = ecode;
730                 return -1;
731         }
732
733         ecode = tdb_lock_expand(tdb, F_RDLCK);
734         if (ecode != TDB_SUCCESS) {
735                 tdb->ecode = ecode;
736                 tdb_allrecord_unlock(tdb, F_RDLCK);
737                 return -1;
738         }
739
740         if (!check_header(tdb, &recovery))
741                 goto fail;
742
743         /* First we do a linear scan, checking all records. */
744         if (!check_linear(tdb, &used, &num_used, &fr, &num_free, recovery))
745                 goto fail;
746
747         for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
748                 if (ft == TDB_OFF_ERR)
749                         goto fail;
750                 if (!check_free_table(tdb, ft, num_ftables, fr, num_free,
751                                       &num_found))
752                         goto fail;
753                 num_ftables++;
754         }
755
756         /* FIXME: Check key uniqueness? */
757         if (!check_hash(tdb, used, num_used, num_ftables, check, private_data))
758                 goto fail;
759
760         if (num_found != num_free) {
761                 tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
762                            "tdb_check: Not all entries are in free table");
763                 return -1;
764         }
765
766         tdb_allrecord_unlock(tdb, F_RDLCK);
767         tdb_unlock_expand(tdb, F_RDLCK);
768         free(fr);
769         free(used);
770         return 0;
771
772 fail:
773         free(fr);
774         free(used);
775         tdb_allrecord_unlock(tdb, F_RDLCK);
776         tdb_unlock_expand(tdb, F_RDLCK);
777         return -1;
778 }