]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/open.c
tdb2: make tests work in parallel.
[ccan] / ccan / tdb2 / open.c
index 9ad9e3ba9f878f8dd876623a29cf3424135b3eb6..3e3b083e59d788906169b3fc0dcaf5ae9d9a3571 100644 (file)
@@ -1,18 +1,38 @@
+ /*
+   Trivial Database 2: opening and closing TDBs
+   Copyright (C) Rusty Russell 2010
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
 #include "private.h"
+#include <ccan/build_assert/build_assert.h>
+#include <assert.h>
 
-/* all lock info, to detect double-opens (fcntl file don't nest!) */
-static struct tdb_file *files = NULL;
+/* all tdbs, to detect double-opens (fcntl file don't nest!) */
+static struct tdb_context *tdbs = NULL;
 
 static struct tdb_file *find_file(dev_t device, ino_t ino)
 {
-       struct tdb_file *i;
+       struct tdb_context *i;
 
-       for (i = files; i; i = i->next) {
-               if (i->device == device && i->inode == ino) {
-                       break;
+       for (i = tdbs; i; i = i->next) {
+               if (i->file->device == device && i->file->inode == ino) {
+                       i->file->refcnt++;
+                       return i->file;
                }
        }
-       return i;
+       return NULL;
 }
 
 static bool read_all(int fd, void *buf, size_t len)
@@ -76,6 +96,15 @@ static uint64_t random_number(struct tdb_context *tdb)
        return ret;
 }
 
+static void tdb2_context_init(struct tdb_context *tdb)
+{
+       /* Initialize the TDB2 fields here */
+       tdb_io_init(tdb);
+       tdb->tdb2.direct_access = 0;
+       tdb->tdb2.transaction = NULL;
+       tdb->tdb2.access = NULL;
+}
+
 struct new_database {
        struct tdb_header hdr;
        struct tdb_freetable ftable;
@@ -99,12 +128,13 @@ static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
        else
                newdb.hdr.hash_seed = random_number(tdb);
        newdb.hdr.hash_test = TDB_HASH_MAGIC;
-       newdb.hdr.hash_test = tdb->khash(&newdb.hdr.hash_test,
-                                        sizeof(newdb.hdr.hash_test),
-                                        newdb.hdr.hash_seed,
-                                        tdb->hash_priv);
+       newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
+                                          sizeof(newdb.hdr.hash_test),
+                                          newdb.hdr.hash_seed,
+                                          tdb->hash_data);
        newdb.hdr.recovery = 0;
        newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
+       newdb.hdr.seqnum = 0;
        memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
        /* Initial hashes are empty. */
        memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
@@ -132,14 +162,14 @@ static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
        *hdr = newdb.hdr;
 
        if (tdb->flags & TDB_INTERNAL) {
-               tdb->map_size = sizeof(newdb);
-               tdb->map_ptr = malloc(tdb->map_size);
-               if (!tdb->map_ptr) {
+               tdb->file->map_size = sizeof(newdb);
+               tdb->file->map_ptr = malloc(tdb->file->map_size);
+               if (!tdb->file->map_ptr) {
                        return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
                                          "tdb_new_database:"
                                          " failed to allocate");
                }
-               memcpy(tdb->map_ptr, &newdb, tdb->map_size);
+               memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
                return TDB_SUCCESS;
        }
        if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
@@ -165,6 +195,186 @@ static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
        return TDB_SUCCESS;
 }
 
+static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
+{
+       tdb->file = malloc(sizeof(*tdb->file));
+       if (!tdb->file)
+               return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
+                                 "tdb_open: cannot alloc tdb_file structure");
+       tdb->file->num_lockrecs = 0;
+       tdb->file->lockrecs = NULL;
+       tdb->file->allrecord_lock.count = 0;
+       tdb->file->refcnt = 1;
+       tdb->file->map_ptr = NULL;
+       return TDB_SUCCESS;
+}
+
+enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
+                                const union tdb_attribute *attr)
+{
+       switch (attr->base.attr) {
+       case TDB_ATTRIBUTE_LOG:
+               tdb->log_fn = attr->log.fn;
+               tdb->log_data = attr->log.data;
+               break;
+       case TDB_ATTRIBUTE_HASH:
+       case TDB_ATTRIBUTE_SEED:
+       case TDB_ATTRIBUTE_OPENHOOK:
+       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
+               return tdb->last_error
+                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                    TDB_LOG_USE_ERROR,
+                                    "tdb_set_attribute:"
+                                    " cannot set %s after opening",
+                                    attr->base.attr == TDB_ATTRIBUTE_HASH
+                                    ? "TDB_ATTRIBUTE_HASH"
+                                    : attr->base.attr == TDB_ATTRIBUTE_SEED
+                                    ? "TDB_ATTRIBUTE_SEED"
+                                    : attr->base.attr == TDB_ATTRIBUTE_OPENHOOK
+                                    ? "TDB_ATTRIBUTE_OPENHOOK"
+                                    : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
+       case TDB_ATTRIBUTE_STATS:
+               return tdb->last_error
+                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                    TDB_LOG_USE_ERROR,
+                                    "tdb_set_attribute:"
+                                    " cannot set TDB_ATTRIBUTE_STATS");
+       case TDB_ATTRIBUTE_FLOCK:
+               tdb->lock_fn = attr->flock.lock;
+               tdb->unlock_fn = attr->flock.unlock;
+               tdb->lock_data = attr->flock.data;
+               break;
+       default:
+               return tdb->last_error
+                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                    TDB_LOG_USE_ERROR,
+                                    "tdb_set_attribute:"
+                                    " unknown attribute type %u",
+                                    attr->base.attr);
+       }
+       return TDB_SUCCESS;
+}
+
+enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
+                                union tdb_attribute *attr)
+{
+       switch (attr->base.attr) {
+       case TDB_ATTRIBUTE_LOG:
+               if (!tdb->log_fn)
+                       return tdb->last_error = TDB_ERR_NOEXIST;
+               attr->log.fn = tdb->log_fn;
+               attr->log.data = tdb->log_data;
+               break;
+       case TDB_ATTRIBUTE_HASH:
+               attr->hash.fn = tdb->hash_fn;
+               attr->hash.data = tdb->hash_data;
+               break;
+       case TDB_ATTRIBUTE_SEED:
+               if (tdb->flags & TDB_VERSION1)
+                       return tdb->last_error
+                               = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                            TDB_LOG_USE_ERROR,
+                                    "tdb_get_attribute:"
+                                    " cannot get TDB_ATTRIBUTE_SEED"
+                                    " on TDB1 tdb.");
+               attr->seed.seed = tdb->hash_seed;
+               break;
+       case TDB_ATTRIBUTE_OPENHOOK:
+               if (!tdb->openhook)
+                       return tdb->last_error = TDB_ERR_NOEXIST;
+               attr->openhook.fn = tdb->openhook;
+               attr->openhook.data = tdb->openhook_data;
+               break;
+       case TDB_ATTRIBUTE_STATS: {
+               size_t size = attr->stats.size;
+               if (size > tdb->stats.size)
+                       size = tdb->stats.size;
+               memcpy(&attr->stats, &tdb->stats, size);
+               break;
+       }
+       case TDB_ATTRIBUTE_FLOCK:
+               attr->flock.lock = tdb->lock_fn;
+               attr->flock.unlock = tdb->unlock_fn;
+               attr->flock.data = tdb->lock_data;
+               break;
+       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
+               if (!(tdb->flags & TDB_VERSION1))
+                       return tdb->last_error
+                               = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                            TDB_LOG_USE_ERROR,
+                                    "tdb_get_attribute:"
+                                    " cannot get TDB_ATTRIBUTE_TDB1_HASHSIZE"
+                                    " on TDB2 tdb.");
+               attr->tdb1_hashsize.hsize = tdb->tdb1.header.hash_size;
+               break;
+       default:
+               return tdb->last_error
+                       = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                    TDB_LOG_USE_ERROR,
+                                    "tdb_get_attribute:"
+                                    " unknown attribute type %u",
+                                    attr->base.attr);
+       }
+       attr->base.next = NULL;
+       return TDB_SUCCESS;
+}
+
+void tdb_unset_attribute(struct tdb_context *tdb,
+                        enum tdb_attribute_type type)
+{
+       switch (type) {
+       case TDB_ATTRIBUTE_LOG:
+               tdb->log_fn = NULL;
+               break;
+       case TDB_ATTRIBUTE_OPENHOOK:
+               tdb->openhook = NULL;
+               break;
+       case TDB_ATTRIBUTE_HASH:
+       case TDB_ATTRIBUTE_SEED:
+       case TDB_ATTRIBUTE_TDB1_HASHSIZE:
+               tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
+                          "tdb_unset_attribute: cannot unset %s after opening",
+                          type == TDB_ATTRIBUTE_HASH
+                          ? "TDB_ATTRIBUTE_HASH"
+                          : type == TDB_ATTRIBUTE_SEED
+                          ? "TDB_ATTRIBUTE_SEED"
+                          : "TDB_ATTRIBUTE_TDB1_HASHSIZE");
+               break;
+       case TDB_ATTRIBUTE_STATS:
+               tdb_logerr(tdb, TDB_ERR_EINVAL,
+                          TDB_LOG_USE_ERROR,
+                          "tdb_unset_attribute:"
+                          "cannot unset TDB_ATTRIBUTE_STATS");
+               break;
+       case TDB_ATTRIBUTE_FLOCK:
+               tdb->lock_fn = tdb_fcntl_lock;
+               tdb->unlock_fn = tdb_fcntl_unlock;
+               break;
+       default:
+               tdb_logerr(tdb, TDB_ERR_EINVAL,
+                          TDB_LOG_USE_ERROR,
+                          "tdb_unset_attribute: unknown attribute type %u",
+                          type);
+       }
+}
+
+static bool is_tdb1(struct tdb1_header *hdr, const void *buf, ssize_t rlen)
+{
+       /* This code assumes we've tried to read entire tdb1 header. */
+       BUILD_ASSERT(sizeof(*hdr) <= sizeof(struct tdb_header));
+
+       if (rlen < (ssize_t)sizeof(*hdr)) {
+               return false;
+       }
+
+       memcpy(hdr, buf, sizeof(*hdr));
+       if (strcmp(hdr->magic_food, TDB_MAGIC_FOOD) != 0)
+               return false;
+
+       return hdr->version == TDB1_VERSION
+               || hdr->version == TDB1_BYTEREV(TDB1_VERSION);
+}
+
 struct tdb_context *tdb_open(const char *name, int tdb_flags,
                             int open_flags, mode_t mode,
                             union tdb_attribute *attr)
@@ -177,65 +387,104 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
        ssize_t rlen;
        struct tdb_header hdr;
        struct tdb_attribute_seed *seed = NULL;
+       struct tdb_attribute_tdb1_hashsize *hsize_attr = NULL;
+       struct tdb_attribute_tdb1_max_dead *maxsize_attr = NULL;
        tdb_bool_err berr;
        enum TDB_ERROR ecode;
+       int openlock;
 
-       tdb = malloc(sizeof(*tdb));
+       tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
        if (!tdb) {
                /* Can't log this */
                errno = ENOMEM;
                return NULL;
        }
-       tdb->name = NULL;
-       tdb->map_ptr = NULL;
-       tdb->direct_access = 0;
-       tdb->map_size = sizeof(struct tdb_header);
+       /* Set name immediately for logging functions. */
+       if (name) {
+               tdb->name = strcpy((char *)(tdb + 1), name);
+       } else {
+               tdb->name = NULL;
+       }
        tdb->flags = tdb_flags;
-       tdb->logfn = NULL;
-       tdb->transaction = NULL;
-       tdb->stats = NULL;
-       tdb->access = NULL;
+       tdb->log_fn = NULL;
+       tdb->open_flags = open_flags;
+       tdb->last_error = TDB_SUCCESS;
        tdb->file = NULL;
-       tdb_hash_init(tdb);
-       tdb_io_init(tdb);
+       tdb->openhook = NULL;
+       tdb->lock_fn = tdb_fcntl_lock;
+       tdb->unlock_fn = tdb_fcntl_unlock;
+       tdb->hash_fn = tdb_jenkins_hash;
+       memset(&tdb->stats, 0, sizeof(tdb->stats));
+       tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
+       tdb->stats.size = sizeof(tdb->stats);
 
        while (attr) {
                switch (attr->base.attr) {
-               case TDB_ATTRIBUTE_LOG:
-                       tdb->logfn = attr->log.log_fn;
-                       tdb->log_private = attr->log.log_private;
-                       break;
                case TDB_ATTRIBUTE_HASH:
-                       tdb->khash = attr->hash.hash_fn;
-                       tdb->hash_priv = attr->hash.hash_private;
+                       tdb->hash_fn = attr->hash.fn;
+                       tdb->hash_data = attr->hash.data;
                        break;
                case TDB_ATTRIBUTE_SEED:
                        seed = &attr->seed;
                        break;
-               case TDB_ATTRIBUTE_STATS:
-                       tdb->stats = &attr->stats;
-                       /* They have stats we don't know about?  Tell them. */
-                       if (tdb->stats->size > sizeof(attr->stats))
-                               tdb->stats->size = sizeof(attr->stats);
+               case TDB_ATTRIBUTE_OPENHOOK:
+                       tdb->openhook = attr->openhook.fn;
+                       tdb->openhook_data = attr->openhook.data;
+                       break;
+               case TDB_ATTRIBUTE_TDB1_HASHSIZE:
+                       hsize_attr = &attr->tdb1_hashsize;
+                       break;
+               case TDB_ATTRIBUTE_TDB1_MAX_DEAD:
+                       maxsize_attr = &attr->tdb1_max_dead;
                        break;
                default:
-                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
-                                          TDB_LOG_USE_ERROR,
-                                          "tdb_open:"
-                                          " unknown attribute type %u",
-                                          attr->base.attr);
-                       goto fail;
+                       /* These are set as normal. */
+                       ecode = tdb_set_attribute(tdb, attr);
+                       if (ecode != TDB_SUCCESS)
+                               goto fail;
                }
                attr = attr->base.next;
        }
 
        if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
-                         | TDB_NOSYNC)) {
+                         | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING
+                         | TDB_RDONLY | TDB_VERSION1)) {
                ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
                                   "tdb_open: unknown flags %u", tdb_flags);
                goto fail;
        }
 
+       if (hsize_attr) {
+               if (!(tdb_flags & TDB_VERSION1) ||
+                   (!(tdb_flags & TDB_INTERNAL) && !(open_flags & O_CREAT))) {
+                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                          TDB_LOG_USE_ERROR,
+                                          "tdb_open: can only use"
+                                          " TDB_ATTRIBUTE_TDB1_HASHSIZE when"
+                                          " creating a TDB_VERSION1 tdb");
+                       goto fail;
+               }
+       }
+
+       if (seed) {
+               if (tdb_flags & TDB_VERSION1) {
+                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                          TDB_LOG_USE_ERROR,
+                                          "tdb_open:"
+                                          " cannot set TDB_ATTRIBUTE_SEED"
+                                          " on TDB1 tdb.");
+                       goto fail;
+               } else if (!(tdb_flags & TDB_INTERNAL)
+                          && !(open_flags & O_CREAT)) {
+                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                          TDB_LOG_USE_ERROR,
+                                          "tdb_open:"
+                                          " cannot set TDB_ATTRIBUTE_SEED"
+                                          " without O_CREAT.");
+                       goto fail;
+               }
+       }
+
        if ((open_flags & O_ACCMODE) == O_WRONLY) {
                ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
                                   "tdb_open: can't open tdb %s write-only",
@@ -244,23 +493,42 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
        }
 
        if ((open_flags & O_ACCMODE) == O_RDONLY) {
-               tdb->read_only = true;
-               tdb->mmap_flags = PROT_READ;
+               openlock = F_RDLCK;
+               tdb->flags |= TDB_RDONLY;
        } else {
-               tdb->read_only = false;
-               tdb->mmap_flags = PROT_READ | PROT_WRITE;
+               if (tdb_flags & TDB_RDONLY) {
+                       ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
+                                          TDB_LOG_USE_ERROR,
+                                          "tdb_open: can't use TDB_RDONLY"
+                                          " without O_RDONLY");
+                       goto fail;
+               }
+               openlock = F_WRLCK;
        }
 
        /* internal databases don't need any of the rest. */
        if (tdb->flags & TDB_INTERNAL) {
                tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
-               ecode = tdb_new_database(tdb, seed, &hdr);
+               ecode = tdb_new_file(tdb);
+               if (ecode != TDB_SUCCESS) {
+                       goto fail;
+               }
+               tdb->file->fd = -1;
+               if (tdb->flags & TDB_VERSION1)
+                       ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
+               else {
+                       ecode = tdb_new_database(tdb, seed, &hdr);
+                       if (ecode == TDB_SUCCESS) {
+                               tdb_convert(tdb, &hdr.hash_seed,
+                                           sizeof(hdr.hash_seed));
+                               tdb->hash_seed = hdr.hash_seed;
+                               tdb2_context_init(tdb);
+                               tdb_ftable_init(tdb);
+                       }
+               }
                if (ecode != TDB_SUCCESS) {
                        goto fail;
                }
-               tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed));
-               tdb->hash_seed = hdr.hash_seed;
-               tdb_ftable_init(tdb);
                return tdb;
        }
 
@@ -276,7 +544,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                        tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
                                   "tdb_open: could not open file %s: %s",
                                   name, strerror(errno));
-                       goto fail;
+                       goto fail_errno;
                }
 
                /* on exec, don't inherit the fd */
@@ -288,42 +556,50 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                        tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
                                   "tdb_open: could not stat open %s: %s",
                                   name, strerror(errno));
-                       goto fail;
+                       close(fd);
+                       goto fail_errno;
                }
 
-               tdb->file = malloc(sizeof(*tdb->file));
-               if (!tdb->file) {
-                       saved_errno = ENOMEM;
-                       tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_open: could alloc file");
+               ecode = tdb_new_file(tdb);
+               if (ecode != TDB_SUCCESS) {
+                       close(fd);
                        goto fail;
                }
 
-               tdb->file->next = files;
-               tdb->file->num_lockrecs = 0;
-               tdb->file->lockrecs = NULL;
-               tdb->file->allrecord_lock.count = 0;
                tdb->file->fd = fd;
                tdb->file->device = st.st_dev;
                tdb->file->inode = st.st_ino;
-       } else {
-               /* FIXME */
-               ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
-                                  "tdb_open: %s (%d,%d) is already open in"
-                                  " this process",
-                                  name, (int)st.st_dev, (int)st.st_ino);
-               goto fail;
+               tdb->file->map_ptr = NULL;
+               tdb->file->map_size = 0;
        }
 
        /* ensure there is only one process initialising at once */
-       ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
+       ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
        if (ecode != TDB_SUCCESS) {
-               goto fail;
+               saved_errno = errno;
+               goto fail_errno;
+       }
+
+       /* call their open hook if they gave us one. */
+       if (tdb->openhook) {
+               ecode = tdb->openhook(tdb->file->fd, tdb->openhook_data);
+               if (ecode != TDB_SUCCESS) {
+                       tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
+                                  "tdb_open: open hook failed");
+                       goto fail;
+               }
+               open_flags |= O_CREAT;
        }
 
        /* If they used O_TRUNC, read will return 0. */
-       rlen = read(tdb->file->fd, &hdr, sizeof(hdr));
+       rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
        if (rlen == 0 && (open_flags & O_CREAT)) {
+               if (tdb->flags & TDB_VERSION1) {
+                       ecode = tdb1_new_database(tdb, hsize_attr, maxsize_attr);
+                       if (ecode != TDB_SUCCESS)
+                               goto fail;
+                       goto finished;
+               }
                ecode = tdb_new_database(tdb, seed, &hdr);
                if (ecode != TDB_SUCCESS) {
                        goto fail;
@@ -335,6 +611,12 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                goto fail;
        } else if (rlen < sizeof(hdr)
                   || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
+               if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
+                       ecode = tdb1_open(tdb, maxsize_attr);
+                       if (!ecode)
+                               goto finished;
+                       goto fail;
+               }
                ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
                                   "tdb_open: %s is not a tdb file", name);
                goto fail;
@@ -344,6 +626,12 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                if (hdr.version == bswap_64(TDB_VERSION))
                        tdb->flags |= TDB_CONVERT;
                else {
+                       if (is_tdb1(&tdb->tdb1.header, &hdr, rlen)) {
+                               ecode = tdb1_open(tdb, maxsize_attr);
+                               if (!ecode)
+                                       goto finished;
+                               goto fail;
+                       }
                        /* wrong version */
                        ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
                                           "tdb_open:"
@@ -351,8 +639,24 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                                           name, (long long)hdr.version);
                        goto fail;
                }
+       } else if (tdb->flags & TDB_CONVERT) {
+               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "tdb_open:"
+                                  " %s does not need TDB_CONVERT",
+                                  name);
+               goto fail;
        }
 
+       if (tdb->flags & TDB_VERSION1) {
+               ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "tdb_open:"
+                                  " %s does not need TDB_VERSION1",
+                                  name);
+               goto fail;
+       }
+
+       tdb2_context_init(tdb);
+
        tdb_convert(tdb, &hdr, sizeof(hdr));
        tdb->hash_seed = hdr.hash_seed;
        hash_test = TDB_HASH_MAGIC;
@@ -366,90 +670,103 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
                goto fail;
        }
 
-       tdb->name = strdup(name);
-       if (!tdb->name) {
-               ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
-                                  "tdb_open: failed to allocate name");
-               goto fail;
-       }
-
        /* Clear any features we don't understand. */
        if ((open_flags & O_ACCMODE) != O_RDONLY) {
                hdr.features_used &= TDB_FEATURE_MASK;
-               if (tdb_write_convert(tdb, offsetof(struct tdb_header,
-                                                   features_used),
-                                     &hdr.features_used,
-                                     sizeof(hdr.features_used)) == -1)
+               ecode = tdb_write_convert(tdb, offsetof(struct tdb_header,
+                                                       features_used),
+                                         &hdr.features_used,
+                                         sizeof(hdr.features_used));
+               if (ecode != TDB_SUCCESS)
                        goto fail;
        }
 
-       tdb_unlock_open(tdb);
+finished:
+       if (tdb->flags & TDB_VERSION1) {
+               /* if needed, run recovery */
+               if (tdb1_transaction_recover(tdb) == -1) {
+                       ecode = tdb->last_error;
+                       goto fail;
+               }
+       }
 
-       /* This make sure we have current map_size and mmap. */
-       tdb->methods->oob(tdb, tdb->map_size + 1, true);
+       tdb_unlock_open(tdb, openlock);
 
-       /* Now it's fully formed, recover if necessary. */
-       berr = tdb_needs_recovery(tdb);
-       if (unlikely(berr != false)) {
-               if (berr < 0) {
-                       ecode = berr;
-                       goto fail;
+       /* This makes sure we have current map_size and mmap. */
+       if (tdb->flags & TDB_VERSION1) {
+               ecode = tdb1_probe_length(tdb);
+       } else {
+               ecode = tdb->tdb2.io->oob(tdb, tdb->file->map_size + 1, true);
+       }
+       if (unlikely(ecode != TDB_SUCCESS))
+               goto fail;
+
+       if (!(tdb->flags & TDB_VERSION1)) {
+               /* Now it's fully formed, recover if necessary. */
+               berr = tdb_needs_recovery(tdb);
+               if (unlikely(berr != false)) {
+                       if (berr < 0) {
+                               ecode = berr;
+                               goto fail;
+                       }
+                       ecode = tdb_lock_and_recover(tdb);
+                       if (ecode != TDB_SUCCESS) {
+                               goto fail;
+                       }
                }
-               ecode = tdb_lock_and_recover(tdb);
+
+               ecode = tdb_ftable_init(tdb);
                if (ecode != TDB_SUCCESS) {
                        goto fail;
                }
        }
 
-       ecode = tdb_ftable_init(tdb);
-       if (ecode != TDB_SUCCESS) {
-               goto fail;
-       }
-
-       /* Add to linked list. */
-       files = tdb->file;
+       tdb->next = tdbs;
+       tdbs = tdb;
        return tdb;
 
  fail:
        /* Map ecode to some logical errno. */
-       if (!saved_errno) {
-               switch (ecode) {
-               case TDB_ERR_CORRUPT:
-               case TDB_ERR_IO:
-                       saved_errno = EIO;
-                       break;
-               case TDB_ERR_LOCK:
-                       saved_errno = EWOULDBLOCK;
-                       break;
-               case TDB_ERR_OOM:
-                       saved_errno = ENOMEM;
-                       break;
-               case TDB_ERR_EINVAL:
-                       saved_errno = EINVAL;
-                       break;
-               default:
-                       saved_errno = EINVAL;
-                       break;
-               }
+       switch (ecode) {
+       case TDB_ERR_CORRUPT:
+       case TDB_ERR_IO:
+               saved_errno = EIO;
+               break;
+       case TDB_ERR_LOCK:
+               saved_errno = EWOULDBLOCK;
+               break;
+       case TDB_ERR_OOM:
+               saved_errno = ENOMEM;
+               break;
+       case TDB_ERR_EINVAL:
+               saved_errno = EINVAL;
+               break;
+       default:
+               saved_errno = EINVAL;
+               break;
        }
 
+fail_errno:
 #ifdef TDB_TRACE
        close(tdb->tracefd);
 #endif
-       if (tdb->map_ptr) {
-               if (tdb->flags & TDB_INTERNAL) {
-                       free(tdb->map_ptr);
-               } else
-                       tdb_munmap(tdb);
-       }
-       free((char *)tdb->name);
        if (tdb->file) {
-               if (close(tdb->file->fd) != 0)
-                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
-                                  "tdb_open: failed to close tdb fd"
-                                  " on error: %s", strerror(errno));
-               free(tdb->file->lockrecs);
-               free(tdb->file);
+               tdb_lock_cleanup(tdb);
+               if (--tdb->file->refcnt == 0) {
+                       assert(tdb->file->num_lockrecs == 0);
+                       if (tdb->file->map_ptr) {
+                               if (tdb->flags & TDB_INTERNAL) {
+                                       free(tdb->file->map_ptr);
+                               } else
+                                       tdb_munmap(tdb->file);
+                       }
+                       if (close(tdb->file->fd) != 0)
+                               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                          "tdb_open: failed to close tdb fd"
+                                          " on error: %s", strerror(errno));
+                       free(tdb->file->lockrecs);
+                       free(tdb->file);
+               }
        }
 
        free(tdb);
@@ -460,33 +777,41 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 int tdb_close(struct tdb_context *tdb)
 {
        int ret = 0;
+       struct tdb_context **i;
 
        tdb_trace(tdb, "tdb_close");
 
-       if (tdb->transaction) {
-               tdb_transaction_cancel(tdb);
+       if (tdb->flags & TDB_VERSION1) {
+               if (tdb->tdb1.transaction) {
+                       tdb1_transaction_cancel(tdb);
+               }
+       } else {
+               if (tdb->tdb2.transaction) {
+                       tdb_transaction_cancel(tdb);
+               }
        }
 
-       if (tdb->map_ptr) {
+       if (tdb->file->map_ptr) {
                if (tdb->flags & TDB_INTERNAL)
-                       free(tdb->map_ptr);
+                       free(tdb->file->map_ptr);
                else
-                       tdb_munmap(tdb);
+                       tdb_munmap(tdb->file);
        }
-       free((char *)tdb->name);
        if (tdb->file) {
-               struct tdb_file **i;
-               ret = close(tdb->file->fd);
-
-               /* Remove from files list */
-               for (i = &files; *i; i = &(*i)->next) {
-                       if (*i == tdb->file) {
-                               *i = tdb->file->next;
-                               break;
-                       }
+               tdb_lock_cleanup(tdb);
+               if (--tdb->file->refcnt == 0) {
+                       ret = close(tdb->file->fd);
+                       free(tdb->file->lockrecs);
+                       free(tdb->file);
+               }
+       }
+
+       /* Remove from tdbs list */
+       for (i = &tdbs; *i; i = &(*i)->next) {
+               if (*i == tdb) {
+                       *i = tdb->next;
+                       break;
                }
-               free(tdb->file->lockrecs);
-               free(tdb->file);
        }
 
 #ifdef TDB_TRACE
@@ -496,3 +821,13 @@ int tdb_close(struct tdb_context *tdb)
 
        return ret;
 }
+
+void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p)
+{
+       struct tdb_context *i;
+
+       for (i = tdbs; i; i = i->next) {
+               if (fn(i, p) != 0)
+                       break;
+       }
+}