tdb2: add missing prototype, move tdb_firstkey/tdb_nextkey to traverse.c
[ccan] / ccan / tdb2 / check.c
1  /* 
2    Trivial Database 2: free list/block handling
3    Copyright (C) Rusty Russell 2010
4    
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/likely/likely.h>
20 #include <ccan/asearch/asearch.h>
21
22 /* We keep an ordered array of offsets. */
23 static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
24 {
25         tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
26         if (!new)
27                 return false;
28         new[(*num)++] = off;
29         *arr = new;
30         return true;
31 }
32
33 static bool check_header(struct tdb_context *tdb)
34 {
35         uint64_t hash_test;
36         struct tdb_header hdr;
37
38         if (tdb_read_convert(tdb, 0, &hdr, sizeof(hdr)) == -1)
39                 return false;
40         /* magic food should not be converted, so convert back. */
41         tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
42
43         hash_test = TDB_HASH_MAGIC;
44         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
45         if (hdr.hash_test != hash_test) {
46                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
47                          "check: hash test %llu should be %llu\n",
48                          hdr.hash_test, hash_test);
49                 return false;
50         }
51
52         if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
53                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
54                          "check: bad magic '%.*s'\n",
55                          sizeof(hdr.magic_food), hdr.magic_food);
56                 return false;
57         }
58
59         /* Don't check reserved: they *can* be used later. */
60         return true;
61 }
62
63 static bool check_hash_tree(struct tdb_context *tdb,
64                             tdb_off_t off, unsigned int group_bits,
65                             uint64_t hprefix,
66                             unsigned hprefix_bits,
67                             tdb_off_t used[],
68                             size_t num_used,
69                             size_t *num_found);
70
71 static bool check_hash_record(struct tdb_context *tdb,
72                               tdb_off_t off,
73                               uint64_t hprefix,
74                               unsigned hprefix_bits,
75                               tdb_off_t used[],
76                               size_t num_used,
77                               size_t *num_found)
78 {
79         struct tdb_used_record rec;
80
81         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
82                 return false;
83
84         if (rec_data_length(&rec)
85             != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
86                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
87                          "tdb_check: Bad hash table length %llu vs %llu\n",
88                          (long long)rec_data_length(&rec),
89                          (long long)sizeof(tdb_off_t)<<TDB_SUBLEVEL_HASH_BITS);
90                 return false;
91         }
92         if (rec_key_length(&rec) != 0) {
93                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
94                          "tdb_check: Bad hash table key length %llu\n",
95                          (long long)rec_key_length(&rec));
96                 return false;
97         }
98         if (rec_hash(&rec) != 0) {
99                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
100                          "tdb_check: Bad hash table hash value %llu\n",
101                          (long long)rec_hash(&rec));
102                 return false;
103         }
104
105         off += sizeof(rec);
106         return check_hash_tree(tdb, off,
107                                TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
108                                hprefix, hprefix_bits,
109                                used, num_used, num_found);
110 }
111
112 static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
113 {
114         /* Can overflow an int. */
115         return *a > *b ? 1
116                 : *a < *b ? -1
117                 : 0;
118 }
119
120 static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
121 {
122         *used += num;
123
124         return (h >> (64 - *used)) & ((1U << num) - 1);
125 }
126
127 static bool check_hash_tree(struct tdb_context *tdb,
128                             tdb_off_t off, unsigned int group_bits,
129                             uint64_t hprefix,
130                             unsigned hprefix_bits,
131                             tdb_off_t used[],
132                             size_t num_used,
133                             size_t *num_found)
134 {
135         unsigned int g, b;
136         const tdb_off_t *hash;
137         struct tdb_used_record rec;
138
139         hash = tdb_access_read(tdb, off,
140                                sizeof(tdb_off_t)
141                                << (group_bits + TDB_HASH_GROUP_BITS),
142                                true);
143         if (!hash)
144                 return false;
145
146         for (g = 0; g < (1 << group_bits); g++) {
147                 const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
148                 for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
149                         unsigned int bucket, i, used_bits;
150                         uint64_t h;
151                         tdb_off_t *p;
152                         if (group[b] == 0)
153                                 continue;
154
155                         off = group[b] & TDB_OFF_MASK;
156                         p = asearch(&off, used, num_used, off_cmp);
157                         if (!p) {
158                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
159                                          "tdb_check: Invalid offset %llu "
160                                          "in hash\n",
161                                          (long long)off);
162                                 goto fail;
163                         }
164                         /* Mark it invalid. */
165                         *p ^= 1;
166                         (*num_found)++;
167
168                         if (is_subhash(group[b])) {
169                                 uint64_t subprefix;
170                                 subprefix = (hprefix 
171                                      << (group_bits + TDB_HASH_GROUP_BITS))
172                                         + g * (1 << TDB_HASH_GROUP_BITS) + b;
173
174                                 if (!check_hash_record(tdb,
175                                                group[b] & TDB_OFF_MASK,
176                                                subprefix,
177                                                hprefix_bits
178                                                        + group_bits
179                                                        + TDB_HASH_GROUP_BITS,
180                                                used, num_used, num_found))
181                                         goto fail;
182                                 continue;
183                         }
184                         /* A normal entry */
185
186                         /* Does it belong here at all? */
187                         h = hash_record(tdb, off);
188                         used_bits = 0;
189                         if (get_bits(h, hprefix_bits, &used_bits) != hprefix
190                             && hprefix_bits) {
191                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
192                                          "check: bad hash placement"
193                                          " 0x%llx vs 0x%llx\n",
194                                          (long long)h, (long long)hprefix);
195                                 goto fail;
196                         }
197
198                         /* Does it belong in this group? */
199                         if (get_bits(h, group_bits, &used_bits) != g) {
200                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
201                                          "check: bad group %llu vs %u\n",
202                                          (long long)h, g);
203                                 goto fail;
204                         }
205
206                         /* Are bucket bits correct? */
207                         bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
208                         if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
209                             != bucket) {
210                                 used_bits -= TDB_HASH_GROUP_BITS;
211                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
212                                          "check: bad bucket %u vs %u\n",
213                                          (unsigned)get_bits(h,
214                                                             TDB_HASH_GROUP_BITS,
215                                                             &used_bits),
216                                          bucket);
217                                 goto fail;
218                         }
219
220                         /* There must not be any zero entries between
221                          * the bucket it belongs in and this one! */
222                         for (i = bucket;
223                              i != b;
224                              i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
225                                 if (group[i] == 0) {
226                                         tdb->log(tdb, TDB_DEBUG_ERROR,
227                                                  tdb->log_priv,
228                                                  "check: bad group placement"
229                                                  " %u vs %u\n",
230                                                  b, bucket);
231                                         goto fail;
232                                 }
233                         }
234
235                         if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1)
236                                 goto fail;
237
238                         /* Bottom bits must match header. */
239                         if ((h & ((1 << 5)-1)) != rec_hash(&rec)) {
240                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
241                                          "tdb_check: Bad hash magic at"
242                                          " offset %llu (0x%llx vs 0x%llx)\n",
243                                          (long long)off,
244                                          (long long)h,
245                                          (long long)rec_hash(&rec));
246                                 goto fail;
247                         }
248                 }
249         }
250         tdb_access_release(tdb, hash);
251         return true;
252
253 fail:
254         tdb_access_release(tdb, hash);
255         return false;
256 }
257
258 static bool check_hash(struct tdb_context *tdb,
259                        tdb_off_t used[],
260                        size_t num_used)
261 {
262         size_t num_found = 0;
263
264         if (!check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
265                              TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
266                              0, 0,  used, num_used, &num_found))
267                 return false;
268
269         if (num_found != num_used) {
270                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
271                          "tdb_check: Not all entries are in hash\n");
272                 return false;
273         }
274         return true;
275 }
276
277 static bool check_free(struct tdb_context *tdb,
278                        tdb_off_t off,
279                        const struct tdb_free_record *frec,
280                        tdb_off_t prev,
281                        tdb_off_t zone_off, unsigned int bucket)
282 {
283         if (frec_magic(frec) != TDB_FREE_MAGIC) {
284                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
285                          "tdb_check: offset %llu bad magic 0x%llx\n",
286                          (long long)off, (long long)frec->magic_and_meta);
287                 return false;
288         }
289         if (tdb->methods->oob(tdb, off
290                               + frec->data_len-sizeof(struct tdb_used_record),
291                               true))
292                 return false;
293         if (off < zone_off || off >= zone_off + (1ULL<<frec_zone_bits(frec))) {
294                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
295                          "tdb_check: offset %llu outside zone %llu-%llu\n",
296                          (long long)off,
297                          (long long)zone_off,
298                          (long long)zone_off + (1ULL<<frec_zone_bits(frec)));
299                 return false;
300         }
301         if (size_to_bucket(frec_zone_bits(frec), frec->data_len) != bucket) {
302                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
303                          "tdb_check: offset %llu in wrong bucket %u vs %u\n",
304                          (long long)off,
305                          bucket,
306                          size_to_bucket(frec_zone_bits(frec), frec->data_len));
307                 return false;
308         }
309         if (prev != frec->prev) {
310                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
311                          "tdb_check: offset %llu bad prev %llu vs %llu\n",
312                          (long long)off,
313                          (long long)prev, (long long)frec->prev);
314                 return false;
315         }
316         return true;
317 }
318                        
319 static tdb_len_t check_free_list(struct tdb_context *tdb,
320                                  tdb_off_t zone_off,
321                                  tdb_off_t free[],
322                                  size_t num_free,
323                                  size_t *num_found)
324 {
325         struct free_zone_header zhdr;
326         tdb_off_t h;
327         unsigned int i;
328
329         if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
330                 return TDB_OFF_ERR;
331
332         for (i = 0; i <= BUCKETS_FOR_ZONE(zhdr.zone_bits); i++) {
333                 tdb_off_t off, prev = 0, *p;
334                 struct tdb_free_record f;
335
336                 h = bucket_off(zone_off, i);
337                 for (off = tdb_read_off(tdb, h); off; off = f.next) {
338                         if (off == TDB_OFF_ERR)
339                                 return false;
340                         if (tdb_read_convert(tdb, off, &f, sizeof(f)))
341                                 return false;
342                         if (!check_free(tdb, off, &f, prev, zone_off, i))
343                                 return false;
344
345                         /* FIXME: Check hash bits */
346                         p = asearch(&off, free, num_free, off_cmp);
347                         if (!p) {
348                                 tdb->log(tdb, TDB_DEBUG_ERROR,
349                                          tdb->log_priv,
350                                          "tdb_check: Invalid offset"
351                                          " %llu in free table\n",
352                                          (long long)off);
353                                 return false;
354                         }
355                         /* Mark it invalid. */
356                         *p ^= 1;
357                         (*num_found)++;
358                         prev = off;
359                 }
360         }
361         return 1ULL << zhdr.zone_bits;
362 }
363
364 static tdb_off_t check_zone(struct tdb_context *tdb, tdb_off_t zone_off,
365                             tdb_off_t **used, size_t *num_used,
366                             tdb_off_t **free, size_t *num_free,
367                             unsigned int *max_zone_bits)
368 {
369         struct free_zone_header zhdr;
370         tdb_off_t off, hdrlen;
371         tdb_len_t len;
372
373         if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
374                 return TDB_OFF_ERR;
375
376         if (zhdr.zone_bits < INITIAL_ZONE_BITS) {
377                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
378                          "check: bad zone_bits %llu at zone %llu\n",
379                          (long long)zhdr.zone_bits, (long long)zone_off);
380                 return TDB_OFF_ERR;
381         }
382
383         /* Zone bits can only increase... */
384         if (zhdr.zone_bits > *max_zone_bits)
385                 *max_zone_bits = zhdr.zone_bits;
386         else if (zhdr.zone_bits < *max_zone_bits) {
387                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
388                          "check: small zone_bits %llu at zone %llu\n",
389                          (long long)zhdr.zone_bits, (long long)zone_off);
390                 return TDB_OFF_ERR;
391         }
392
393         /* Zone must be within file! */
394         if (tdb->methods->oob(tdb, zone_off + (1ULL << zhdr.zone_bits), false))
395                 return TDB_OFF_ERR;
396
397         hdrlen = sizeof(zhdr)
398                 + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
399         for (off = zone_off + hdrlen;
400              off < zone_off + (1ULL << zhdr.zone_bits);
401              off += len) {
402                 union {
403                         struct tdb_used_record u;
404                         struct tdb_free_record f;
405                 } pad, *p;
406                 p = tdb_get(tdb, off, &pad, sizeof(pad));
407                 if (!p)
408                         return TDB_OFF_ERR;
409                 if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
410                         if (frec_zone_bits(&p->f) != zhdr.zone_bits) {
411                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
412                                          "tdb_check: Bad free zone bits %u"
413                                          " at offset %llu\n",
414                                          frec_zone_bits(&p->f),
415                                          (long long)off);
416                                 return TDB_OFF_ERR;
417                         }
418                         /* This record is free! */
419                         if (!append(free, num_free, off))
420                                 return TDB_OFF_ERR;
421                         len = sizeof(p->u) + p->f.data_len;
422                         if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
423                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
424                                          "tdb_check: free overlength %llu"
425                                          " at offset %llu\n",
426                                          (long long)len, (long long)off);
427                                 return TDB_OFF_ERR;
428                         }
429                 } else {
430                         uint64_t klen, dlen, extra;
431
432                         /* This record is used! */
433                         if (rec_magic(&p->u) != TDB_MAGIC) {
434                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
435                                          "tdb_check: Bad magic 0x%llx"
436                                          " at offset %llu\n",
437                                          (long long)rec_magic(&p->u),
438                                          (long long)off);
439                                 return TDB_OFF_ERR;
440                         }
441
442                         if (rec_zone_bits(&p->u) != zhdr.zone_bits) {
443                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
444                                          "tdb_check: Bad zone bits %u"
445                                          " at offset %llu\n",
446                                          rec_zone_bits(&p->u),
447                                          (long long)off);
448                                 return TDB_OFF_ERR;
449                         }
450                         
451                         if (!append(used, num_used, off))
452                                 return TDB_OFF_ERR;
453
454                         klen = rec_key_length(&p->u);
455                         dlen = rec_data_length(&p->u);
456                         extra = rec_extra_padding(&p->u);
457
458                         len = sizeof(p->u) + klen + dlen + extra;
459                         if (off + len > zone_off + (1ULL << zhdr.zone_bits)) {
460                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
461                                          "tdb_check: used overlength %llu"
462                                          " at offset %llu\n",
463                                          (long long)len, (long long)off);
464                                 return TDB_OFF_ERR;
465                         }
466
467                         if (len < sizeof(p->f)) {
468                                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
469                                          "tdb_check: too short record %llu at"
470                                          " %llu\n",
471                                          (long long)len, (long long)off);
472                                 return TDB_OFF_ERR;
473                         }
474                 }
475         }
476         return 1ULL << zhdr.zone_bits;
477 }
478
479 /* FIXME: call check() function. */
480 int tdb_check(struct tdb_context *tdb,
481               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
482               void *private_data)
483 {
484         tdb_off_t *free = NULL, *used = NULL, off;
485         tdb_len_t len;
486         size_t num_free = 0, num_used = 0, num_found = 0;
487         unsigned max_zone_bits = INITIAL_ZONE_BITS;
488         uint8_t tailer;
489
490         /* This always ensures the header is uptodate. */
491         if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
492                 return -1;
493
494         if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
495                 tdb_allrecord_unlock(tdb, F_RDLCK);
496                 return -1;
497         }
498
499         if (!check_header(tdb))
500                 goto fail;
501
502         /* First we do a linear scan, checking all records. */
503         for (off = sizeof(struct tdb_header);
504              off < tdb->map_size - 1;
505              off += len) {
506                 len = check_zone(tdb, off, &used, &num_used, &free, &num_free,
507                                  &max_zone_bits);
508                 if (len == TDB_OFF_ERR)
509                         goto fail;
510         }
511
512         /* Check tailer. */
513         if (tdb->methods->read(tdb, tdb->map_size - 1, &tailer, 1) == -1)
514                 goto fail;
515         if (tailer != max_zone_bits) {
516                 tdb->ecode = TDB_ERR_CORRUPT;
517                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
518                          "tdb_check: Bad tailer value %u vs %u\n", tailer,
519                          max_zone_bits);
520                 goto fail;
521         }
522
523         /* FIXME: Check key uniqueness? */
524         if (!check_hash(tdb, used, num_used))
525                 goto fail;
526
527         for (off = sizeof(struct tdb_header);
528              off < tdb->map_size - 1;
529              off += len) {
530                 len = check_free_list(tdb, off, free, num_free, &num_found);
531                 if (len == TDB_OFF_ERR)
532                         goto fail;
533         }
534         if (num_found != num_free) {
535                 tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
536                          "tdb_check: Not all entries are in free table\n");
537                 return false;
538         }
539
540         tdb_allrecord_unlock(tdb, F_RDLCK);
541         tdb_unlock_expand(tdb, F_RDLCK);
542         return 0;
543
544 fail:
545         tdb_allrecord_unlock(tdb, F_RDLCK);
546         tdb_unlock_expand(tdb, F_RDLCK);
547         return -1;
548 }