]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb.c
2b0b77ad43726fe48b614d2f842dcdcbe6c18869
[ccan] / ccan / tdb2 / tdb.c
1 #include "private.h"
2 #include <ccan/tdb2/tdb2.h>
3 #include <assert.h>
4 #include <stdarg.h>
5
6 /* The null return. */
7 struct tdb_data tdb_null = { .dptr = NULL, .dsize = 0 };
8
9 /* all contexts, to ensure no double-opens (fcntl locks don't nest!) */
10 static struct tdb_context *tdbs = NULL;
11
12 static bool tdb_already_open(dev_t device, ino_t ino)
13 {
14         struct tdb_context *i;
15         
16         for (i = tdbs; i; i = i->next) {
17                 if (i->device == device && i->inode == ino) {
18                         return true;
19                 }
20         }
21
22         return false;
23 }
24
25 static bool read_all(int fd, void *buf, size_t len)
26 {
27         while (len) {
28                 ssize_t ret;
29                 ret = read(fd, buf, len);
30                 if (ret < 0)
31                         return false;
32                 if (ret == 0) {
33                         /* ETOOSHORT? */
34                         errno = EWOULDBLOCK;
35                         return false;
36                 }
37                 buf = (char *)buf + ret;
38                 len -= ret;
39         }
40         return true;
41 }
42
43 static uint64_t random_number(struct tdb_context *tdb)
44 {
45         int fd;
46         uint64_t ret = 0;
47         struct timeval now;
48
49         fd = open("/dev/urandom", O_RDONLY);
50         if (fd >= 0) {
51                 if (read_all(fd, &ret, sizeof(ret))) {
52                         close(fd);
53                         return ret;
54                 }
55                 close(fd);
56         }
57         /* FIXME: Untested!  Based on Wikipedia protocol description! */
58         fd = open("/dev/egd-pool", O_RDWR);
59         if (fd >= 0) {
60                 /* Command is 1, next byte is size we want to read. */
61                 char cmd[2] = { 1, sizeof(uint64_t) };
62                 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
63                         char reply[1 + sizeof(uint64_t)];
64                         int r = read(fd, reply, sizeof(reply));
65                         if (r > 1) {
66                                 /* Copy at least some bytes. */
67                                 memcpy(&ret, reply+1, r - 1);
68                                 if (reply[0] == sizeof(uint64_t)
69                                     && r == sizeof(reply)) {
70                                         close(fd);
71                                         return ret;
72                                 }
73                         }
74                 }
75                 close(fd);
76         }
77
78         /* Fallback: pid and time. */
79         gettimeofday(&now, NULL);
80         ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
81         tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
82                    "tdb_open: random from getpid and time");
83         return ret;
84 }
85
86 struct new_database {
87         struct tdb_header hdr;
88         struct tdb_freetable ftable;
89 };
90
91 /* initialise a new database */
92 static int tdb_new_database(struct tdb_context *tdb,
93                             struct tdb_attribute_seed *seed,
94                             struct tdb_header *hdr)
95 {
96         /* We make it up in memory, then write it out if not internal */
97         struct new_database newdb;
98         unsigned int magic_len;
99         ssize_t rlen;
100
101         /* Fill in the header */
102         newdb.hdr.version = TDB_VERSION;
103         if (seed)
104                 newdb.hdr.hash_seed = seed->seed;
105         else
106                 newdb.hdr.hash_seed = random_number(tdb);
107         newdb.hdr.hash_test = TDB_HASH_MAGIC;
108         newdb.hdr.hash_test = tdb->khash(&newdb.hdr.hash_test,
109                                          sizeof(newdb.hdr.hash_test),
110                                          newdb.hdr.hash_seed,
111                                          tdb->hash_priv);
112         newdb.hdr.recovery = 0;
113         memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
114         /* Initial hashes are empty. */
115         memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
116
117         /* Free is empty. */
118         newdb.hdr.free_table = offsetof(struct new_database, ftable);
119         memset(&newdb.ftable, 0, sizeof(newdb.ftable));
120         tdb->ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
121                                 sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
122                                 sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
123                                 0);
124         if (tdb->ecode != TDB_SUCCESS) {
125                 return -1;
126         }
127
128         /* Magic food */
129         memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
130         strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
131
132         /* This creates an endian-converted database, as if read from disk */
133         magic_len = sizeof(newdb.hdr.magic_food);
134         tdb_convert(tdb,
135                     (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
136
137         *hdr = newdb.hdr;
138
139         if (tdb->flags & TDB_INTERNAL) {
140                 tdb->map_size = sizeof(newdb);
141                 tdb->map_ptr = malloc(tdb->map_size);
142                 if (!tdb->map_ptr) {
143                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
144                                    "tdb_new_database: failed to allocate");
145                         return -1;
146                 }
147                 memcpy(tdb->map_ptr, &newdb, tdb->map_size);
148                 return 0;
149         }
150         if (lseek(tdb->fd, 0, SEEK_SET) == -1)
151                 return -1;
152
153         if (ftruncate(tdb->fd, 0) == -1)
154                 return -1;
155
156         rlen = write(tdb->fd, &newdb, sizeof(newdb));
157         if (rlen != sizeof(newdb)) {
158                 if (rlen >= 0)
159                         errno = ENOSPC;
160                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
161                            "tdb_new_database: %zi writing header: %s",
162                            rlen, strerror(errno));
163                 return -1;
164         }
165         return 0;
166 }
167
168 struct tdb_context *tdb_open(const char *name, int tdb_flags,
169                              int open_flags, mode_t mode,
170                              union tdb_attribute *attr)
171 {
172         struct tdb_context *tdb;
173         struct stat st;
174         int saved_errno = 0;
175         uint64_t hash_test;
176         unsigned v;
177         ssize_t rlen;
178         struct tdb_header hdr;
179         struct tdb_attribute_seed *seed = NULL;
180         enum TDB_ERROR ecode;
181
182         tdb = malloc(sizeof(*tdb));
183         if (!tdb) {
184                 /* Can't log this */
185                 errno = ENOMEM;
186                 return NULL;
187         }
188         tdb->name = NULL;
189         tdb->map_ptr = NULL;
190         tdb->direct_access = 0;
191         tdb->fd = -1;
192         tdb->map_size = sizeof(struct tdb_header);
193         tdb->ecode = TDB_SUCCESS;
194         tdb->flags = tdb_flags;
195         tdb->logfn = NULL;
196         tdb->transaction = NULL;
197         tdb->stats = NULL;
198         tdb->access = NULL;
199         tdb_hash_init(tdb);
200         tdb_io_init(tdb);
201         tdb_lock_init(tdb);
202
203         while (attr) {
204                 switch (attr->base.attr) {
205                 case TDB_ATTRIBUTE_LOG:
206                         tdb->logfn = attr->log.log_fn;
207                         tdb->log_private = attr->log.log_private;
208                         break;
209                 case TDB_ATTRIBUTE_HASH:
210                         tdb->khash = attr->hash.hash_fn;
211                         tdb->hash_priv = attr->hash.hash_private;
212                         break;
213                 case TDB_ATTRIBUTE_SEED:
214                         seed = &attr->seed;
215                         break;
216                 case TDB_ATTRIBUTE_STATS:
217                         tdb->stats = &attr->stats;
218                         /* They have stats we don't know about?  Tell them. */
219                         if (tdb->stats->size > sizeof(attr->stats))
220                                 tdb->stats->size = sizeof(attr->stats);
221                         break;
222                 default:
223                         tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
224                                    "tdb_open: unknown attribute type %u",
225                                    attr->base.attr);
226                         goto fail;
227                 }
228                 attr = attr->base.next;
229         }
230
231         if ((open_flags & O_ACCMODE) == O_WRONLY) {
232                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
233                            "tdb_open: can't open tdb %s write-only", name);
234                 goto fail;
235         }
236
237         if ((open_flags & O_ACCMODE) == O_RDONLY) {
238                 tdb->read_only = true;
239                 /* read only databases don't do locking */
240                 tdb->flags |= TDB_NOLOCK;
241                 tdb->mmap_flags = PROT_READ;
242         } else {
243                 tdb->read_only = false;
244                 tdb->mmap_flags = PROT_READ | PROT_WRITE;
245         }
246
247         /* internal databases don't need any of the rest. */
248         if (tdb->flags & TDB_INTERNAL) {
249                 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
250                 if (tdb_new_database(tdb, seed, &hdr) != 0) {
251                         goto fail;
252                 }
253                 tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed));
254                 tdb->hash_seed = hdr.hash_seed;
255                 tdb_ftable_init(tdb);
256                 return tdb;
257         }
258
259         if ((tdb->fd = open(name, open_flags, mode)) == -1) {
260                 /* errno set by open(2) */
261                 saved_errno = errno;
262                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
263                            "tdb_open: could not open file %s: %s",
264                            name, strerror(errno));
265                 goto fail;
266         }
267
268         /* on exec, don't inherit the fd */
269         v = fcntl(tdb->fd, F_GETFD, 0);
270         fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC);
271
272         /* ensure there is only one process initialising at once */
273         tdb->ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
274         if (tdb->ecode != TDB_SUCCESS) {
275                 goto fail;
276         }
277
278         /* If they used O_TRUNC, read will return 0. */
279         rlen = read(tdb->fd, &hdr, sizeof(hdr));
280         if (rlen == 0 && (open_flags & O_CREAT)) {
281                 if (tdb_new_database(tdb, seed, &hdr) == -1) {
282                         goto fail;
283                 }
284         } else if (rlen < 0) {
285                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
286                            "tdb_open: error %s reading %s",
287                            strerror(errno), name);
288                 goto fail;
289         } else if (rlen < sizeof(hdr)
290                    || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
291                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
292                            "tdb_open: %s is not a tdb file", name);
293                 goto fail;
294         }
295
296         if (hdr.version != TDB_VERSION) {
297                 if (hdr.version == bswap_64(TDB_VERSION))
298                         tdb->flags |= TDB_CONVERT;
299                 else {
300                         /* wrong version */
301                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
302                                    "tdb_open: %s is unknown version 0x%llx",
303                                    name, (long long)hdr.version);
304                         goto fail;
305                 }
306         }
307
308         tdb_convert(tdb, &hdr, sizeof(hdr));
309         tdb->hash_seed = hdr.hash_seed;
310         hash_test = TDB_HASH_MAGIC;
311         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
312         if (hdr.hash_test != hash_test) {
313                 /* wrong hash variant */
314                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
315                            "tdb_open: %s uses a different hash function",
316                            name);
317                 goto fail;
318         }
319
320         if (fstat(tdb->fd, &st) == -1) {
321                 saved_errno = errno;
322                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
323                            "tdb_open: could not stat open %s: %s",
324                            name, strerror(errno));
325                 goto fail;
326         }
327
328         /* Is it already in the open list?  If so, fail. */
329         if (tdb_already_open(st.st_dev, st.st_ino)) {
330                 /* FIXME */
331                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_USE_ERROR,
332                            "tdb_open: %s (%d,%d) is already open in this"
333                            " process",
334                            name, (int)st.st_dev, (int)st.st_ino);
335                 goto fail;
336         }
337
338         tdb->name = strdup(name);
339         if (!tdb->name) {
340                 tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
341                            "tdb_open: failed to allocate name");
342                 goto fail;
343         }
344
345         tdb->device = st.st_dev;
346         tdb->inode = st.st_ino;
347         tdb_unlock_open(tdb);
348
349         /* This make sure we have current map_size and mmap. */
350         tdb->methods->oob(tdb, tdb->map_size + 1, true);
351
352         /* Now it's fully formed, recover if necessary. */
353         if (tdb_needs_recovery(tdb)) {
354                 ecode = tdb_lock_and_recover(tdb);
355                 if (ecode != TDB_SUCCESS) {
356                         tdb->ecode = ecode;
357                         goto fail;
358                 }
359         }
360
361         tdb->ecode = tdb_ftable_init(tdb);
362         if (tdb->ecode != TDB_SUCCESS) {
363                 goto fail;
364         }
365
366         tdb->next = tdbs;
367         tdbs = tdb;
368         return tdb;
369
370  fail:
371         /* Map ecode to some logical errno. */
372         if (!saved_errno) {
373                 switch (tdb->ecode) {
374                 case TDB_ERR_CORRUPT:
375                 case TDB_ERR_IO:
376                         saved_errno = EIO;
377                         break;
378                 case TDB_ERR_LOCK:
379                         saved_errno = EWOULDBLOCK;
380                         break;
381                 case TDB_ERR_OOM:
382                         saved_errno = ENOMEM;
383                         break;
384                 case TDB_ERR_EINVAL:
385                         saved_errno = EINVAL;
386                         break;
387                 default:
388                         saved_errno = EINVAL;
389                         break;
390                 }
391         }
392
393 #ifdef TDB_TRACE
394         close(tdb->tracefd);
395 #endif
396         if (tdb->map_ptr) {
397                 if (tdb->flags & TDB_INTERNAL) {
398                         free(tdb->map_ptr);
399                 } else
400                         tdb_munmap(tdb);
401         }
402         free(tdb->lockrecs);
403         free((char *)tdb->name);
404         if (tdb->fd != -1)
405                 if (close(tdb->fd) != 0)
406                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
407                                    "tdb_open: failed to close tdb->fd"
408                                    " on error: %s", strerror(errno));
409         free(tdb);
410         errno = saved_errno;
411         return NULL;
412 }
413
414 static int update_rec_hdr(struct tdb_context *tdb,
415                           tdb_off_t off,
416                           tdb_len_t keylen,
417                           tdb_len_t datalen,
418                           struct tdb_used_record *rec,
419                           uint64_t h)
420 {
421         uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
422         enum TDB_ERROR ecode;
423
424         ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
425                            keylen + dataroom, h);
426         if (ecode != TDB_SUCCESS) {
427                 tdb->ecode = ecode;
428                 return -1;
429         }
430
431         ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
432         if (ecode != TDB_SUCCESS) {
433                 tdb->ecode = ecode;
434                 return -1;
435         }
436         return 0;
437 }
438
439 /* Returns -1 on error, 0 on OK */
440 static int replace_data(struct tdb_context *tdb,
441                         struct hash_info *h,
442                         struct tdb_data key, struct tdb_data dbuf,
443                         tdb_off_t old_off, tdb_len_t old_room,
444                         bool growing)
445 {
446         tdb_off_t new_off;
447         enum TDB_ERROR ecode;
448
449         /* Allocate a new record. */
450         new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
451                         growing);
452         if (TDB_OFF_IS_ERR(new_off)) {
453                 tdb->ecode = new_off;
454                 return -1;
455         }
456
457         /* We didn't like the existing one: remove it. */
458         if (old_off) {
459                 add_stat(tdb, frees, 1);
460                 ecode = add_free_record(tdb, old_off,
461                                         sizeof(struct tdb_used_record)
462                                         + key.dsize + old_room);
463                 if (ecode == TDB_SUCCESS)
464                         ecode = replace_in_hash(tdb, h, new_off);
465         } else {
466                 ecode = add_to_hash(tdb, h, new_off);
467         }
468         if (ecode != TDB_SUCCESS) {
469                 tdb->ecode = ecode;
470                 return -1;
471         }
472
473         new_off += sizeof(struct tdb_used_record);
474         ecode = tdb->methods->twrite(tdb, new_off, key.dptr, key.dsize);
475         if (ecode != TDB_SUCCESS) {
476                 tdb->ecode = ecode;
477                 return -1;
478         }
479
480         new_off += key.dsize;
481         ecode = tdb->methods->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
482         if (ecode != TDB_SUCCESS) {
483                 tdb->ecode = ecode;
484                 return -1;
485         }
486
487         /* FIXME: tdb_increment_seqnum(tdb); */
488         return 0;
489 }
490
491 int tdb_store(struct tdb_context *tdb,
492               struct tdb_data key, struct tdb_data dbuf, int flag)
493 {
494         struct hash_info h;
495         tdb_off_t off;
496         tdb_len_t old_room = 0;
497         struct tdb_used_record rec;
498         int ret;
499         enum TDB_ERROR ecode;
500
501         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
502         if (TDB_OFF_IS_ERR(off)) {
503                 tdb->ecode = off;
504                 return -1;
505         }
506
507         /* Now we have lock on this hash bucket. */
508         if (flag == TDB_INSERT) {
509                 if (off) {
510                         tdb->ecode = TDB_ERR_EXISTS;
511                         goto fail;
512                 }
513         } else {
514                 if (off) {
515                         old_room = rec_data_length(&rec)
516                                 + rec_extra_padding(&rec);
517                         if (old_room >= dbuf.dsize) {
518                                 /* Can modify in-place.  Easy! */
519                                 if (update_rec_hdr(tdb, off,
520                                                    key.dsize, dbuf.dsize,
521                                                    &rec, h.h))
522                                         goto fail;
523                                 ecode = tdb->methods->twrite(tdb,
524                                                              off + sizeof(rec)
525                                                              + key.dsize,
526                                                              dbuf.dptr,
527                                                              dbuf.dsize);
528                                 if (ecode != TDB_SUCCESS) {
529                                         tdb->ecode = ecode;
530                                         goto fail;
531                                 }
532                                 tdb_unlock_hashes(tdb, h.hlock_start,
533                                                   h.hlock_range, F_WRLCK);
534                                 return 0;
535                         }
536                 } else {
537                         if (flag == TDB_MODIFY) {
538                                 /* if the record doesn't exist and we
539                                    are in TDB_MODIFY mode then we should fail
540                                    the store */
541                                 tdb->ecode = TDB_ERR_NOEXIST;
542                                 goto fail;
543                         }
544                 }
545         }
546
547         /* If we didn't use the old record, this implies we're growing. */
548         ret = replace_data(tdb, &h, key, dbuf, off, old_room, off != 0);
549         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
550         return ret;
551
552 fail:
553         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
554         return -1;
555 }
556
557 int tdb_append(struct tdb_context *tdb,
558                struct tdb_data key, struct tdb_data dbuf)
559 {
560         struct hash_info h;
561         tdb_off_t off;
562         struct tdb_used_record rec;
563         tdb_len_t old_room = 0, old_dlen;
564         unsigned char *newdata;
565         struct tdb_data new_dbuf;
566         enum TDB_ERROR ecode;
567         int ret;
568
569         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
570         if (TDB_OFF_IS_ERR(off)) {
571                 tdb->ecode = off;
572                 return -1;
573         }
574
575         if (off) {
576                 old_dlen = rec_data_length(&rec);
577                 old_room = old_dlen + rec_extra_padding(&rec);
578
579                 /* Fast path: can append in place. */
580                 if (rec_extra_padding(&rec) >= dbuf.dsize) {
581                         if (update_rec_hdr(tdb, off, key.dsize,
582                                            old_dlen + dbuf.dsize, &rec, h.h))
583                                 goto fail;
584
585                         off += sizeof(rec) + key.dsize + old_dlen;
586                         ecode = tdb->methods->twrite(tdb, off, dbuf.dptr,
587                                                      dbuf.dsize);
588                         if (ecode != TDB_SUCCESS) {
589                                 tdb->ecode = ecode;
590                                 goto fail;
591                         }
592
593                         /* FIXME: tdb_increment_seqnum(tdb); */
594                         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range,
595                                           F_WRLCK);
596                         return 0;
597                 }
598
599                 /* Slow path. */
600                 newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
601                 if (!newdata) {
602                         tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
603                                    "tdb_append: failed to allocate %zu bytes",
604                                    (size_t)(key.dsize+old_dlen+dbuf.dsize));
605                         goto fail;
606                 }
607                 ecode = tdb->methods->tread(tdb, off + sizeof(rec) + key.dsize,
608                                             newdata, old_dlen);
609                 if (ecode != TDB_SUCCESS) {
610                         tdb->ecode = ecode;
611                         free(newdata);
612                         goto fail;
613                 }
614                 memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
615                 new_dbuf.dptr = newdata;
616                 new_dbuf.dsize = old_dlen + dbuf.dsize;
617         } else {
618                 newdata = NULL;
619                 new_dbuf = dbuf;
620         }
621
622         /* If they're using tdb_append(), it implies they're growing record. */
623         ret = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
624         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
625         free(newdata);
626
627         return ret;
628
629 fail:
630         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
631         return -1;
632 }
633
634 struct tdb_data tdb_fetch(struct tdb_context *tdb, struct tdb_data key)
635 {
636         tdb_off_t off;
637         struct tdb_used_record rec;
638         struct hash_info h;
639         struct tdb_data ret;
640
641         off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
642         if (TDB_OFF_IS_ERR(off)) {
643                 tdb->ecode = off;
644                 return tdb_null;
645         }
646
647         if (!off) {
648                 tdb->ecode = TDB_ERR_NOEXIST;
649                 ret = tdb_null;
650         } else {
651                 ret.dsize = rec_data_length(&rec);
652                 ret.dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
653                                           ret.dsize);
654                 if (TDB_PTR_IS_ERR(ret.dptr)) {
655                         tdb->ecode = TDB_PTR_ERR(ret.dptr);
656                         ret = tdb_null;
657                 }
658         }
659
660         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
661         return ret;
662 }
663
664 int tdb_delete(struct tdb_context *tdb, struct tdb_data key)
665 {
666         tdb_off_t off;
667         struct tdb_used_record rec;
668         struct hash_info h;
669         enum TDB_ERROR ecode;
670
671         off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
672         if (TDB_OFF_IS_ERR(off)) {
673                 tdb->ecode = off;
674                 return -1;
675         }
676
677         if (!off) {
678                 tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
679                 tdb->ecode = TDB_ERR_NOEXIST;
680                 return -1;
681         }
682
683         ecode = delete_from_hash(tdb, &h);
684         if (ecode != TDB_SUCCESS) {
685                 tdb->ecode = ecode;
686                 goto unlock_err;
687         }
688
689         /* Free the deleted entry. */
690         add_stat(tdb, frees, 1);
691         ecode = add_free_record(tdb, off,
692                                 sizeof(struct tdb_used_record)
693                                 + rec_key_length(&rec)
694                                 + rec_data_length(&rec)
695                                 + rec_extra_padding(&rec));
696         if (ecode != TDB_SUCCESS) {
697                 tdb->ecode = ecode;
698                 goto unlock_err;
699         }
700
701         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
702         return 0;
703
704 unlock_err:
705         tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
706         return -1;
707 }
708
709 int tdb_close(struct tdb_context *tdb)
710 {
711         struct tdb_context **i;
712         int ret = 0;
713
714         tdb_trace(tdb, "tdb_close");
715
716         if (tdb->transaction) {
717                 tdb_transaction_cancel(tdb);
718         }
719
720         if (tdb->map_ptr) {
721                 if (tdb->flags & TDB_INTERNAL)
722                         free(tdb->map_ptr);
723                 else
724                         tdb_munmap(tdb);
725         }
726         free((char *)tdb->name);
727         if (tdb->fd != -1) {
728                 ret = close(tdb->fd);
729                 tdb->fd = -1;
730         }
731         free(tdb->lockrecs);
732
733         /* Remove from contexts list */
734         for (i = &tdbs; *i; i = &(*i)->next) {
735                 if (*i == tdb) {
736                         *i = tdb->next;
737                         break;
738                 }
739         }
740
741 #ifdef TDB_TRACE
742         close(tdb->tracefd);
743 #endif
744         free(tdb);
745
746         return ret;
747 }
748
749 enum TDB_ERROR tdb_error(const struct tdb_context *tdb)
750 {
751         return tdb->ecode;
752 }
753
754 const char *tdb_errorstr(const struct tdb_context *tdb)
755 {
756         /* Gcc warns if you miss a case in the switch, so use that. */
757         switch (tdb->ecode) {
758         case TDB_SUCCESS: return "Success";
759         case TDB_ERR_CORRUPT: return "Corrupt database";
760         case TDB_ERR_IO: return "IO Error";
761         case TDB_ERR_LOCK: return "Locking error";
762         case TDB_ERR_OOM: return "Out of memory";
763         case TDB_ERR_EXISTS: return "Record exists";
764         case TDB_ERR_EINVAL: return "Invalid parameter";
765         case TDB_ERR_NOEXIST: return "Record does not exist";
766         case TDB_ERR_RDONLY: return "write not permitted";
767         }
768         return "Invalid error code";
769 }
770
771 enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
772                                enum TDB_ERROR ecode,
773                                enum tdb_log_level level,
774                                const char *fmt, ...)
775 {
776         char *message;
777         va_list ap;
778         size_t len;
779         /* tdb_open paths care about errno, so save it. */
780         int saved_errno = errno;
781
782         tdb->ecode = ecode;
783
784         if (!tdb->logfn)
785                 return ecode;
786
787         /* FIXME: Doesn't assume asprintf. */
788         va_start(ap, fmt);
789         len = vsnprintf(NULL, 0, fmt, ap);
790         va_end(ap);
791
792         message = malloc(len + 1);
793         if (!message) {
794                 tdb->logfn(tdb, TDB_LOG_ERROR, tdb->log_private,
795                            "out of memory formatting message:");
796                 tdb->logfn(tdb, level, tdb->log_private, fmt);
797                 return ecode;
798         }
799         va_start(ap, fmt);
800         len = vsprintf(message, fmt, ap);
801         va_end(ap);
802         tdb->logfn(tdb, level, tdb->log_private, message);
803         free(message);
804         errno = saved_errno;
805         return ecode;
806 }