]> git.ozlabs.org Git - ccan/blob - ccan/tdb2/open.c
configurator: HAVE_SECTION_START_STOP
[ccan] / ccan / tdb2 / open.c
1  /*
2    Trivial Database 2: opening and closing TDBs
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/build_assert/build_assert.h>
20 #include <assert.h>
21
22 /* all tdbs, to detect double-opens (fcntl file don't nest!) */
23 static struct tdb_context *tdbs = NULL;
24
25 static struct tdb_file *find_file(dev_t device, ino_t ino)
26 {
27         struct tdb_context *i;
28
29         for (i = tdbs; i; i = i->next) {
30                 if (i->file->device == device && i->file->inode == ino) {
31                         i->file->refcnt++;
32                         return i->file;
33                 }
34         }
35         return NULL;
36 }
37
38 static bool read_all(int fd, void *buf, size_t len)
39 {
40         while (len) {
41                 ssize_t ret;
42                 ret = read(fd, buf, len);
43                 if (ret < 0)
44                         return false;
45                 if (ret == 0) {
46                         /* ETOOSHORT? */
47                         errno = EWOULDBLOCK;
48                         return false;
49                 }
50                 buf = (char *)buf + ret;
51                 len -= ret;
52         }
53         return true;
54 }
55
56 static uint64_t random_number(struct tdb_context *tdb)
57 {
58         int fd;
59         uint64_t ret = 0;
60         struct timeval now;
61
62         fd = open("/dev/urandom", O_RDONLY);
63         if (fd >= 0) {
64                 if (read_all(fd, &ret, sizeof(ret))) {
65                         close(fd);
66                         return ret;
67                 }
68                 close(fd);
69         }
70         /* FIXME: Untested!  Based on Wikipedia protocol description! */
71         fd = open("/dev/egd-pool", O_RDWR);
72         if (fd >= 0) {
73                 /* Command is 1, next byte is size we want to read. */
74                 char cmd[2] = { 1, sizeof(uint64_t) };
75                 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
76                         char reply[1 + sizeof(uint64_t)];
77                         int r = read(fd, reply, sizeof(reply));
78                         if (r > 1) {
79                                 /* Copy at least some bytes. */
80                                 memcpy(&ret, reply+1, r - 1);
81                                 if (reply[0] == sizeof(uint64_t)
82                                     && r == sizeof(reply)) {
83                                         close(fd);
84                                         return ret;
85                                 }
86                         }
87                 }
88                 close(fd);
89         }
90
91         /* Fallback: pid and time. */
92         gettimeofday(&now, NULL);
93         ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
94         tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
95                    "tdb_open: random from getpid and time");
96         return ret;
97 }
98
99 static void tdb2_context_init(struct tdb_context *tdb)
100 {
101         /* Initialize the TDB2 fields here */
102         tdb_io_init(tdb);
103         tdb->tdb2.direct_access = 0;
104         tdb->tdb2.transaction = NULL;
105         tdb->tdb2.access = NULL;
106 }
107
108 struct new_database {
109         struct tdb_header hdr;
110         struct tdb_freetable ftable;
111 };
112
113 /* initialise a new database */
114 static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
115                                        struct tdb_attribute_seed *seed,
116                                        struct tdb_header *hdr)
117 {
118         /* We make it up in memory, then write it out if not internal */
119         struct new_database newdb;
120         unsigned int magic_len;
121         ssize_t rlen;
122         enum TDB_ERROR ecode;
123
124         /* Fill in the header */
125         newdb.hdr.version = TDB_VERSION;
126         if (seed)
127                 newdb.hdr.hash_seed = seed->seed;
128         else
129                 newdb.hdr.hash_seed = random_number(tdb);
130         newdb.hdr.hash_test = TDB_HASH_MAGIC;
131         newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
132                                            sizeof(newdb.hdr.hash_test),
133                                            newdb.hdr.hash_seed,
134                                            tdb->hash_data);
135         newdb.hdr.recovery = 0;
136         newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
137         newdb.hdr.seqnum = 0;
138         newdb.hdr.capabilities = 0;
139         memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
140         /* Initial hashes are empty. */
141         memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
142
143         /* Free is empty. */
144         newdb.hdr.free_table = offsetof(struct new_database, ftable);
145         memset(&newdb.ftable, 0, sizeof(newdb.ftable));
146         ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
147                            sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
148                            sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
149                            0);
150         if (ecode != TDB_SUCCESS) {
151                 return ecode;
152         }
153
154         /* Magic food */
155         memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
156         strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
157
158         /* This creates an endian-converted database, as if read from disk */
159         magic_len = sizeof(newdb.hdr.magic_food);
160         tdb_convert(tdb,
161                     (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
162
163         *hdr = newdb.hdr;
164
165         if (tdb->flags & TDB_INTERNAL) {
166                 tdb->file->map_size = sizeof(newdb);
167                 tdb->file->map_ptr = malloc(tdb->file->map_size);
168                 if (!tdb->file->map_ptr) {
169                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
170                                           "tdb_new_database:"
171                                           " failed to allocate");
172                 }
173                 memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
174                 return TDB_SUCCESS;
175         }
176         if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
177                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
178                                   "tdb_new_database:"
179                                   " failed to seek: %s", strerror(errno));
180         }
181
182         if (ftruncate(tdb->file->fd, 0) == -1) {
183                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
184                                   "tdb_new_database:"
185                                   " failed to truncate: %s", strerror(errno));
186         }
187
188         rlen = write(tdb->file->fd, &newdb, sizeof(newdb));
189         if (rlen != sizeof(newdb)) {
190                 if (rlen >= 0)
191                         errno = ENOSPC;
192                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
193                                   "tdb_new_database: %zi writing header: %s",
194                                   rlen, strerror(errno));
195         }
196         return TDB_SUCCESS;
197 }
198
199 static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
200 {
201         tdb->file = malloc(sizeof(*tdb->file));
202         if (!tdb->file)
203                 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
204                                   "tdb_open: cannot alloc tdb_file structure");
205         tdb->file->num_lockrecs = 0;
206         tdb->file->lockrecs = NULL;
207         tdb->file->allrecord_lock.count = 0;
208         tdb->file->refcnt = 1;
209         tdb->file->map_ptr = NULL;
210         return TDB_SUCCESS;
211 }
212
213 enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
214                                  const union tdb_attribute *attr)
215 {
216         switch (attr->base.attr) {
217         case TDB_ATTRIBUTE_LOG:
218                 tdb->log_fn = attr->log.fn;
219                 tdb->log_data = attr->log.data;
220                 break;
221         case TDB_ATTRIBUTE_HASH:
222         case TDB_ATTRIBUTE_SEED:
223         case TDB_ATTRIBUTE_OPENHOOK:
224         case TDB_ATTRIBUTE_TDB1_HASHSIZE:
225                 return tdb->last_error
226                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
227                                      TDB_LOG_USE_ERROR,
228                                      "tdb_set_attribute:"
229                                      " cannot set %s after opening",
230                                      attr->base.attr == TDB_ATTRIBUTE_HASH
231                                      ? "TDB_ATTRIBUTE_HASH"
232                                      : attr->base.attr == TDB_ATTRIBUTE_SEED
233                                      ? "TDB_ATTRIBUTE_SEED"
234                                      : attr->base.attr == TDB_ATTRIBUTE_OPENHOOK
235                                      ? "TDB_ATTRIBUTE_OPENHOOK"
236                                      : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
237         case TDB_ATTRIBUTE_STATS:
238                 return tdb->last_error
239                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
240                                      TDB_LOG_USE_ERROR,
241                                      "tdb_set_attribute:"
242                                      " cannot set TDB_ATTRIBUTE_STATS");
243         case TDB_ATTRIBUTE_FLOCK:
244                 tdb->lock_fn = attr->flock.lock;
245                 tdb->unlock_fn = attr->flock.unlock;
246                 tdb->lock_data = attr->flock.data;
247                 break;
248         default:
249                 return tdb->last_error
250                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
251                                      TDB_LOG_USE_ERROR,
252                                      "tdb_set_attribute:"
253                                      " unknown attribute type %u",
254                                      attr->base.attr);
255         }
256         return TDB_SUCCESS;
257 }
258
259 enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
260                                  union tdb_attribute *attr)
261 {
262         switch (attr->base.attr) {
263         case TDB_ATTRIBUTE_LOG:
264                 if (!tdb->log_fn)
265                         return tdb->last_error = TDB_ERR_NOEXIST;
266                 attr->log.fn = tdb->log_fn;
267                 attr->log.data = tdb->log_data;
268                 break;
269         case TDB_ATTRIBUTE_HASH:
270                 attr->hash.fn = tdb->hash_fn;
271                 attr->hash.data = tdb->hash_data;
272                 break;
273         case TDB_ATTRIBUTE_SEED:
274                 if (tdb->flags & TDB_VERSION1)
275                         return tdb->last_error
276                                 = tdb_logerr(tdb, TDB_ERR_EINVAL,
277                                              TDB_LOG_USE_ERROR,
278                                      "tdb_get_attribute:"
279                                      " cannot get TDB_ATTRIBUTE_SEED"
280                                      " on TDB1 tdb.");
281                 attr->seed.seed = tdb->hash_seed;
282                 break;
283         case TDB_ATTRIBUTE_OPENHOOK:
284                 if (!tdb->openhook)
285                         return tdb->last_error = TDB_ERR_NOEXIST;
286                 attr->openhook.fn = tdb->openhook;
287                 attr->openhook.data = tdb->openhook_data;
288                 break;
289         case TDB_ATTRIBUTE_STATS: {
290                 size_t size = attr->stats.size;
291                 if (size > tdb->stats.size)
292                         size = tdb->stats.size;
293                 memcpy(&attr->stats, &tdb->stats, size);
294                 break;
295         }
296         case TDB_ATTRIBUTE_FLOCK:
297                 attr->flock.lock = tdb->lock_fn;
298                 attr->flock.unlock = tdb->unlock_fn;
299                 attr->flock.data = tdb->lock_data;
300                 break;
301         case TDB_ATTRIBUTE_TDB1_HASHSIZE:
302                 if (!(tdb->flags & TDB_VERSION1))
303                         return tdb->last_error
304                                 = tdb_logerr(tdb, TDB_ERR_EINVAL,
305                                              TDB_LOG_USE_ERROR,
306                                      "tdb_get_attribute:"
307                                      " cannot get TDB_ATTRIBUTE_TDB1_HASHSIZE"
308                                      " on TDB2 tdb.");
309                 attr->tdb1_hashsize.hsize = tdb->tdb1.header.hash_size;
310                 break;
311         default:
312                 return tdb->last_error
313                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
314                                      TDB_LOG_USE_ERROR,
315                                      "tdb_get_attribute:"
316                                      " unknown attribute type %u",
317                                      attr->base.attr);
318         }
319         attr->base.next = NULL;
320         return TDB_SUCCESS;
321 }
322
323 void tdb_unset_attribute(struct tdb_context *tdb,
324                          enum tdb_attribute_type type)
325 {
326         switch (type) {
327         case TDB_ATTRIBUTE_LOG:
328                 tdb->log_fn = NULL;
329                 break;
330         case TDB_ATTRIBUTE_OPENHOOK:
331                 tdb->openhook = NULL;
332                 break;
333         case TDB_ATTRIBUTE_HASH:
334         case TDB_ATTRIBUTE_SEED:
335         case TDB_ATTRIBUTE_TDB1_HASHSIZE:
336                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
337                            "tdb_unset_attribute: cannot unset %s after opening",
338                            type == TDB_ATTRIBUTE_HASH
339                            ? "TDB_ATTRIBUTE_HASH"
340                            : type == TDB_ATTRIBUTE_SEED
341                            ? "TDB_ATTRIBUTE_SEED"
342                            : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
343                 break;
344         case TDB_ATTRIBUTE_STATS:
345                 tdb_logerr(tdb, TDB_ERR_EINVAL,
346                            TDB_LOG_USE_ERROR,
347                            "tdb_unset_attribute:"
348                            "cannot unset TDB_ATTRIBUTE_STATS");
349                 break;
350         case TDB_ATTRIBUTE_FLOCK:
351                 tdb->lock_fn = tdb_fcntl_lock;
352                 tdb->unlock_fn = tdb_fcntl_unlock;
353                 break;
354         default:
355                 tdb_logerr(tdb, TDB_ERR_EINVAL,
356                            TDB_LOG_USE_ERROR,
357                            "tdb_unset_attribute: unknown attribute type %u",
358                            type);
359         }
360 }
361
362 static bool is_tdb1(struct tdb1_header *hdr, const void *buf, ssize_t rlen)
363 {
364         /* This code assumes we've tried to read entire tdb1 header. */
365         BUILD_ASSERT(sizeof(*hdr) <= sizeof(struct tdb_header));
366
367         if (rlen < (ssize_t)sizeof(*hdr)) {
368                 return false;
369         }
370
371         memcpy(hdr, buf, sizeof(*hdr));
372         if (strcmp(hdr->magic_food, TDB_MAGIC_FOOD) != 0)
373                 return false;
374
375         return hdr->version == TDB1_VERSION
376                 || hdr->version == TDB1_BYTEREV(TDB1_VERSION);
377 }
378
379 /* The top three bits of the capability tell us whether it matters. */
380 enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
381                                   tdb_off_t type)
382 {
383         if (type & TDB_CAP_NOOPEN) {
384                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
385                                   "%s: file has unknown capability %llu",
386                                   caller, type & TDB_CAP_NOOPEN);
387         }
388
389         if ((type & TDB_CAP_NOWRITE) && !(tdb->flags & TDB_RDONLY)) {
390                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_ERROR,
391                                   "%s: file has unknown capability %llu"
392                                   " (cannot write to it)",
393                                   caller, type & TDB_CAP_NOOPEN);
394         }
395
396         if (type & TDB_CAP_NOCHECK) {
397                 tdb->flags |= TDB_CANT_CHECK;
398         }
399         return TDB_SUCCESS;
400 }
401
402 static enum TDB_ERROR capabilities_ok(struct tdb_context *tdb,
403                                       tdb_off_t capabilities)
404 {
405         tdb_off_t off, next;
406         enum TDB_ERROR ecode = TDB_SUCCESS;
407         const struct tdb_capability *cap;
408
409         /* Check capability list. */
410         for (off = capabilities; off && ecode == TDB_SUCCESS; off = next) {
411                 cap = tdb_access_read(tdb, off, sizeof(*cap), true);
412                 if (TDB_PTR_IS_ERR(cap)) {
413                         return TDB_PTR_ERR(cap);
414                 }
415
416                 switch (cap->type & TDB_CAP_TYPE_MASK) {
417                 /* We don't understand any capabilities (yet). */
418                 default:
419                         ecode = unknown_capability(tdb, "tdb_open", cap->type);
420                 }
421                 next = cap->next;
422                 tdb_access_release(tdb, cap);
423         }
424         return ecode;
425 }
426
427 struct tdb_context *tdb_open(const char *name, int tdb_flags,
428                              int open_flags, mode_t mode,
429                              union tdb_attribute *attr)
430 {
431         struct tdb_context *tdb;
432         struct stat st;
433         int saved_errno = 0;
434         uint64_t hash_test;
435         unsigned v;
436         ssize_t rlen;
437         struct tdb_header hdr;
438         struct tdb_attribute_seed *seed = NULL;
439         struct tdb_attribute_tdb1_hashsize *hsize_attr = NULL;
440         struct tdb_attribute_tdb1_max_dead *maxsize_attr = NULL;
441         tdb_bool_err berr;
442         enum TDB_ERROR ecode;
443         int openlock;
444
445         tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
446         if (!tdb) {
447                 /* Can't log this */
448                 errno = ENOMEM;
449                 return NULL;
450         }
451         /* Set name immediately for logging functions. */
452         if (name) {
453                 tdb->name = strcpy((char *)(tdb + 1), name);
454         } else {
455                 tdb->name = NULL;
456         }
457         tdb->flags = tdb_flags;
458         tdb->log_fn = NULL;
459         tdb->open_flags = open_flags;
460         tdb->last_error = TDB_SUCCESS;
461         tdb->file = NULL;
462         tdb->openhook = NULL;
463         tdb->lock_fn = tdb_fcntl_lock;
464         tdb->unlock_fn = tdb_fcntl_unlock;
465         tdb->hash_fn = tdb_jenkins_hash;
466         memset(&tdb->stats, 0, sizeof(tdb->stats));
467         tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
468         tdb->stats.size = sizeof(tdb->stats);
469
470         while (attr) {
471                 switch (attr->base.attr) {
472                 case TDB_ATTRIBUTE_HASH:
473                         tdb->hash_fn = attr->hash.fn;
474                         tdb->hash_data = attr->hash.data;
475                         break;
476                 case TDB_ATTRIBUTE_SEED:
477                         seed = &attr->seed;
478                         break;
479                 case TDB_ATTRIBUTE_OPENHOOK:
480                         tdb->openhook = attr->openhook.fn;
481                         tdb->openhook_data = attr->openhook.data;
482                         break;
483                 case TDB_ATTRIBUTE_TDB1_HASHSIZE:
484                         hsize_attr = &attr->tdb1_hashsize;
485                         break;
486                 case TDB_ATTRIBUTE_TDB1_MAX_DEAD:
487                         maxsize_attr = &attr->tdb1_max_dead;
488                         break;
489                 default:
490                         /* These are set as normal. */
491                         ecode = tdb_set_attribute(tdb, attr);
492                         if (ecode != TDB_SUCCESS)
493                                 goto fail;
494                 }
495                 attr = attr->base.next;
496         }
497
498         if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
499                           | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING
500                           | TDB_RDONLY | TDB_VERSION1)) {
501                 ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
502                                    "tdb_open: unknown flags %u", tdb_flags);
503                 goto fail;
504         }
505
506         if (hsize_attr) {
507                 if (!(tdb_flags & TDB_VERSION1) ||
508                     (!(tdb_flags & TDB_INTERNAL) && !(open_flags & O_CREAT))) {
509                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
510                                            TDB_LOG_USE_ERROR,
511                                            "tdb_open: can only use"
512                                            " TDB_ATTRIBUTE_TDB1_HASHSIZE when"
513                                            " creating a TDB_VERSION1 tdb");
514                         goto fail;
515                 }
516         }
517
518         if (seed) {
519                 if (tdb_flags & TDB_VERSION1) {
520                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
521                                            TDB_LOG_USE_ERROR,
522                                            "tdb_open:"
523                                            " cannot set TDB_ATTRIBUTE_SEED"
524                                            " on TDB1 tdb.");
525                         goto fail;
526                 } else if (!(tdb_flags & TDB_INTERNAL)
527                            && !(open_flags & O_CREAT)) {
528                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
529                                            TDB_LOG_USE_ERROR,
530                                            "tdb_open:"
531                                            " cannot set TDB_ATTRIBUTE_SEED"
532                                            " without O_CREAT.");
533                         goto fail;
534                 }
535         }
536
537         if ((open_flags & O_ACCMODE) == O_WRONLY) {
538                 ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
539                                    "tdb_open: can't open tdb %s write-only",
540                                    name);
541                 goto fail;
542         }
543
544         if ((open_flags & O_ACCMODE) == O_RDONLY) {
545                 openlock = F_RDLCK;
546                 tdb->flags |= TDB_RDONLY;
547         } else {
548                 if (tdb_flags & TDB_RDONLY) {
549                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
550                                            TDB_LOG_USE_ERROR,
551                                            "tdb_open: can't use TDB_RDONLY"
552                                            " without O_RDONLY");
553                         goto fail;
554                 }
555                 openlock = F_WRLCK;
556         }
557
558         /* internal databases don't need any of the rest. */
559         if (tdb->flags & TDB_INTERNAL) {
560                 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
561                 ecode = tdb_new_file(tdb);
562                 if (ecode != TDB_SUCCESS) {
563                         goto fail;
564                 }
565                 tdb->file->fd = -1;
566                 if (tdb->flags & TDB_VERSION1)
567                         ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
568                 else {
569                         ecode = tdb_new_database(tdb, seed, &hdr);
570                         if (ecode == TDB_SUCCESS) {
571                                 tdb_convert(tdb, &hdr.hash_seed,
572                                             sizeof(hdr.hash_seed));
573                                 tdb->hash_seed = hdr.hash_seed;
574                                 tdb2_context_init(tdb);
575                                 tdb_ftable_init(tdb);
576                         }
577                 }
578                 if (ecode != TDB_SUCCESS) {
579                         goto fail;
580                 }
581                 return tdb;
582         }
583
584         if (stat(name, &st) != -1)
585                 tdb->file = find_file(st.st_dev, st.st_ino);
586
587         if (!tdb->file) {
588                 int fd;
589
590                 if ((fd = open(name, open_flags, mode)) == -1) {
591                         /* errno set by open(2) */
592                         saved_errno = errno;
593                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
594                                    "tdb_open: could not open file %s: %s",
595                                    name, strerror(errno));
596                         goto fail_errno;
597                 }
598
599                 /* on exec, don't inherit the fd */
600                 v = fcntl(fd, F_GETFD, 0);
601                 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
602
603                 if (fstat(fd, &st) == -1) {
604                         saved_errno = errno;
605                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
606                                    "tdb_open: could not stat open %s: %s",
607                                    name, strerror(errno));
608                         close(fd);
609                         goto fail_errno;
610                 }
611
612                 ecode = tdb_new_file(tdb);
613                 if (ecode != TDB_SUCCESS) {
614                         close(fd);
615                         goto fail;
616                 }
617
618                 tdb->file->fd = fd;
619                 tdb->file->device = st.st_dev;
620                 tdb->file->inode = st.st_ino;
621                 tdb->file->map_ptr = NULL;
622                 tdb->file->map_size = 0;
623         }
624
625         /* ensure there is only one process initialising at once */
626         ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
627         if (ecode != TDB_SUCCESS) {
628                 saved_errno = errno;
629                 goto fail_errno;
630         }
631
632         /* call their open hook if they gave us one. */
633         if (tdb->openhook) {
634                 ecode = tdb->openhook(tdb->file->fd, tdb->openhook_data);
635                 if (ecode != TDB_SUCCESS) {
636                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
637                                    "tdb_open: open hook failed");
638                         goto fail;
639                 }
640                 open_flags |= O_CREAT;
641         }
642
643         /* If they used O_TRUNC, read will return 0. */
644         rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
645         if (rlen == 0 && (open_flags & O_CREAT)) {
646                 if (tdb->flags & TDB_VERSION1) {
647                         ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
648                         if (ecode != TDB_SUCCESS)
649                                 goto fail;
650                         goto finished;
651                 }
652                 ecode = tdb_new_database(tdb, seed, &hdr);
653                 if (ecode != TDB_SUCCESS) {
654                         goto fail;
655                 }
656         } else if (rlen < 0) {
657                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
658                                    "tdb_open: error %s reading %s",
659                                    strerror(errno), name);
660                 goto fail;
661         } else if (rlen < sizeof(hdr)
662                    || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
663                 if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
664                         ecode = tdb1_open(tdb, maxsize_attr);
665                         if (!ecode)
666                                 goto finished;
667                         goto fail;
668                 }
669                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
670                                    "tdb_open: %s is not a tdb file", name);
671                 goto fail;
672         }
673
674         if (hdr.version != TDB_VERSION) {
675                 if (hdr.version == bswap_64(TDB_VERSION))
676                         tdb->flags |= TDB_CONVERT;
677                 else {
678                         if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
679                                 ecode = tdb1_open(tdb, maxsize_attr);
680                                 if (!ecode)
681                                         goto finished;
682                                 goto fail;
683                         }
684                         /* wrong version */
685                         ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
686                                            "tdb_open:"
687                                            " %s is unknown version 0x%llx",
688                                            name, (long long)hdr.version);
689                         goto fail;
690                 }
691         } else if (tdb->flags & TDB_CONVERT) {
692                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
693                                    "tdb_open:"
694                                    " %s does not need TDB_CONVERT",
695                                    name);
696                 goto fail;
697         }
698
699         /* This is a version2 tdb. */
700         if (tdb->flags & TDB_VERSION1) {
701                 tdb->flags &= ~TDB_VERSION1;
702         }
703
704         tdb2_context_init(tdb);
705
706         tdb_convert(tdb, &hdr, sizeof(hdr));
707         tdb->hash_seed = hdr.hash_seed;
708         hash_test = TDB_HASH_MAGIC;
709         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
710         if (hdr.hash_test != hash_test) {
711                 /* wrong hash variant */
712                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
713                                    "tdb_open:"
714                                    " %s uses a different hash function",
715                                    name);
716                 goto fail;
717         }
718
719         ecode = capabilities_ok(tdb, hdr.capabilities);
720         if (ecode != TDB_SUCCESS) {
721                 goto fail;
722         }
723
724         /* Clear any features we don't understand. */
725         if ((open_flags & O_ACCMODE) != O_RDONLY) {
726                 hdr.features_used &= TDB_FEATURE_MASK;
727                 ecode = tdb_write_convert(tdb, offsetof(struct tdb_header,
728                                                         features_used),
729                                           &hdr.features_used,
730                                           sizeof(hdr.features_used));
731                 if (ecode != TDB_SUCCESS)
732                         goto fail;
733         }
734
735 finished:
736         if (tdb->flags & TDB_VERSION1) {
737                 /* if needed, run recovery */
738                 if (tdb1_transaction_recover(tdb) == -1) {
739                         ecode = tdb->last_error;
740                         goto fail;
741                 }
742         }
743
744         tdb_unlock_open(tdb, openlock);
745
746         /* This makes sure we have current map_size and mmap. */
747         if (tdb->flags & TDB_VERSION1) {
748                 ecode = tdb1_probe_length(tdb);
749         } else {
750                 ecode = tdb->tdb2.io->oob(tdb, tdb->file->map_size + 1, true);
751         }
752         if (unlikely(ecode != TDB_SUCCESS))
753                 goto fail;
754
755         if (!(tdb->flags & TDB_VERSION1)) {
756                 /* Now it's fully formed, recover if necessary. */
757                 berr = tdb_needs_recovery(tdb);
758                 if (unlikely(berr != false)) {
759                         if (berr < 0) {
760                                 ecode = TDB_OFF_TO_ERR(berr);
761                                 goto fail;
762                         }
763                         ecode = tdb_lock_and_recover(tdb);
764                         if (ecode != TDB_SUCCESS) {
765                                 goto fail;
766                         }
767                 }
768
769                 ecode = tdb_ftable_init(tdb);
770                 if (ecode != TDB_SUCCESS) {
771                         goto fail;
772                 }
773         }
774
775         tdb->next = tdbs;
776         tdbs = tdb;
777         return tdb;
778
779  fail:
780         /* Map ecode to some logical errno. */
781         switch (TDB_ERR_TO_OFF(ecode)) {
782         case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT):
783         case TDB_ERR_TO_OFF(TDB_ERR_IO):
784                 saved_errno = EIO;
785                 break;
786         case TDB_ERR_TO_OFF(TDB_ERR_LOCK):
787                 saved_errno = EWOULDBLOCK;
788                 break;
789         case TDB_ERR_TO_OFF(TDB_ERR_OOM):
790                 saved_errno = ENOMEM;
791                 break;
792         case TDB_ERR_TO_OFF(TDB_ERR_EINVAL):
793                 saved_errno = EINVAL;
794                 break;
795         default:
796                 saved_errno = EINVAL;
797                 break;
798         }
799
800 fail_errno:
801 #ifdef TDB_TRACE
802         close(tdb->tracefd);
803 #endif
804         if (tdb->file) {
805                 tdb_lock_cleanup(tdb);
806                 if (--tdb->file->refcnt == 0) {
807                         assert(tdb->file->num_lockrecs == 0);
808                         if (tdb->file->map_ptr) {
809                                 if (tdb->flags & TDB_INTERNAL) {
810                                         free(tdb->file->map_ptr);
811                                 } else
812                                         tdb_munmap(tdb->file);
813                         }
814                         if (close(tdb->file->fd) != 0)
815                                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
816                                            "tdb_open: failed to close tdb fd"
817                                            " on error: %s", strerror(errno));
818                         free(tdb->file->lockrecs);
819                         free(tdb->file);
820                 }
821         }
822
823         free(tdb);
824         errno = saved_errno;
825         return NULL;
826 }
827
828 int tdb_close(struct tdb_context *tdb)
829 {
830         int ret = 0;
831         struct tdb_context **i;
832
833         tdb_trace(tdb, "tdb_close");
834
835         if (tdb->flags & TDB_VERSION1) {
836                 if (tdb->tdb1.transaction) {
837                         tdb1_transaction_cancel(tdb);
838                 }
839         } else {
840                 if (tdb->tdb2.transaction) {
841                         tdb_transaction_cancel(tdb);
842                 }
843         }
844
845         if (tdb->file->map_ptr) {
846                 if (tdb->flags & TDB_INTERNAL)
847                         free(tdb->file->map_ptr);
848                 else
849                         tdb_munmap(tdb->file);
850         }
851         if (tdb->file) {
852                 tdb_lock_cleanup(tdb);
853                 if (--tdb->file->refcnt == 0) {
854                         ret = close(tdb->file->fd);
855                         free(tdb->file->lockrecs);
856                         free(tdb->file);
857                 }
858         }
859
860         /* Remove from tdbs list */
861         for (i = &tdbs; *i; i = &(*i)->next) {
862                 if (*i == tdb) {
863                         *i = tdb->next;
864                         break;
865                 }
866         }
867
868 #ifdef TDB_TRACE
869         close(tdb->tracefd);
870 #endif
871         free(tdb);
872
873         return ret;
874 }
875
876 void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p)
877 {
878         struct tdb_context *i;
879
880         for (i = tdbs; i; i = i->next) {
881                 if (fn(i, p) != 0)
882                         break;
883         }
884 }