]> git.ozlabs.org Git - ccan/blobdiff - ccan/tdb2/io.c
tdb2: rework lock.c functions to return enum TDB_ERROR.
[ccan] / ccan / tdb2 / io.c
index f1cd7e90820d2b71f5baffaf597fad04f3b71244..dffd088510c186377d433b292b63f7fca7e117d8 100644 (file)
@@ -1,4 +1,4 @@
- /* 
+ /*
    Unix SMB/CIFS implementation.
 
    trivial database library
@@ -26,6 +26,7 @@
    License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "private.h"
+#include <assert.h>
 #include <ccan/likely/likely.h>
 
 void tdb_munmap(struct tdb_context *tdb)
@@ -47,8 +48,7 @@ void tdb_mmap(struct tdb_context *tdb)
        if (tdb->flags & TDB_NOMMAP)
                return;
 
-       tdb->map_ptr = mmap(NULL, tdb->map_size, 
-                           PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
+       tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags,
                            MAP_SHARED, tdb->fd, 0);
 
        /*
@@ -56,85 +56,72 @@ void tdb_mmap(struct tdb_context *tdb)
         */
        if (tdb->map_ptr == MAP_FAILED) {
                tdb->map_ptr = NULL;
-               tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv,
-                        "tdb_mmap failed for size %lld (%s)\n", 
-                        (long long)tdb->map_size, strerror(errno));
+               tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
+                          "tdb_mmap failed for size %lld (%s)",
+                          (long long)tdb->map_size, strerror(errno));
        }
 }
 
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
-   if necessary 
+   if necessary
    note that "len" is the minimum length needed for the db
 */
 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
 {
        struct stat st;
+       enum TDB_ERROR ecode;
+
+       /* We can't hold pointers during this: we could unmap! */
+       assert(!tdb->direct_access
+              || (tdb->flags & TDB_NOLOCK)
+              || tdb_has_expansion_lock(tdb));
+
        if (len <= tdb->map_size)
                return 0;
        if (tdb->flags & TDB_INTERNAL) {
                if (!probe) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->ecode = TDB_ERR_IO;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
+                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
                                 "tdb_oob len %lld beyond internal"
-                                " malloc size %lld\n",
+                                " malloc size %lld",
                                 (long long)len,
                                 (long long)tdb->map_size);
                }
                return -1;
        }
 
-       if (fstat(tdb->fd, &st) == -1) {
-               tdb->ecode = TDB_ERR_IO;
+       ecode = tdb_lock_expand(tdb, F_RDLCK);
+       if (ecode != TDB_SUCCESS) {
+               tdb->ecode = ecode;
+               return -1;
+       }
+
+       if (fstat(tdb->fd, &st) != 0) {
+               tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                          "Failed to fstat file: %s", strerror(errno));
+               tdb_unlock_expand(tdb, F_RDLCK);
                return -1;
        }
 
+       tdb_unlock_expand(tdb, F_RDLCK);
+
        if (st.st_size < (size_t)len) {
                if (!probe) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->ecode = TDB_ERR_IO;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-                                "tdb_oob len %lld beyond eof at %lld\n",
-                                (long long)len, (long long)st.st_size);
+                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "tdb_oob len %zu beyond eof at %zu",
+                                  (size_t)len, st.st_size);
                }
                return -1;
        }
 
        /* Unmap, update size, remap */
        tdb_munmap(tdb);
+
        tdb->map_size = st.st_size;
        tdb_mmap(tdb);
        return 0;
 }
 
-static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
-{
-       if (unlikely(!tdb->map_ptr))
-               return NULL;
-
-       /* FIXME: We can do a subset of this! */
-       if (tdb->transaction)
-               return NULL;
-
-       if (unlikely(tdb_oob(tdb, off + len, true) == -1))
-               return NULL;
-       return (char *)tdb->map_ptr + off;
-}
-
-/* Either make a copy into pad and return that, or return ptr into mmap. */
-/* Note: pad has to be a real object, so we can't get here if len
- * overflows size_t */
-void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
-{
-       if (likely(!(tdb->flags & TDB_CONVERT))) {
-               void *ret = tdb_direct(tdb, off, len);
-               if (ret)
-                       return ret;
-       }
-       return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad;
-}
-
 /* Endian conversion: we only ever deal with 8 byte quantities */
 void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
 {
@@ -146,25 +133,25 @@ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
        return buf;
 }
 
-/* Return first non-zero offset in num offset array, or num. */
 /* FIXME: Return the off? */
-uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
-                             uint64_t num)
+uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
+                             tdb_off_t base, uint64_t start, uint64_t end)
 {
        uint64_t i;
        const uint64_t *val;
 
        /* Zero vs non-zero is the same unconverted: minor optimization. */
-       val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
+       val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
+                             (end - start) * sizeof(tdb_off_t), false);
        if (!val)
-               return num;
+               return end;
 
-       for (i = 0; i < num; i++) {
+       for (i = 0; i < (end - start); i++) {
                if (val[i])
                        break;
        }
        tdb_access_release(tdb, val);
-       return i;
+       return start + i;
 }
 
 /* Return first zero offset in num offset array, or num. */
@@ -190,14 +177,16 @@ uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
 {
        char buf[8192] = { 0 };
-       void *p = tdb_direct(tdb, off, len);
+       void *p = tdb->methods->direct(tdb, off, len, true);
+
+       assert(!tdb->read_only);
        if (p) {
                memset(p, 0, len);
                return 0;
        }
        while (len) {
                unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
-               if (tdb->methods->write(tdb, off, buf, todo) == -1)
+               if (tdb->methods->twrite(tdb, off, buf, todo) == -1)
                        return -1;
                len -= todo;
                off += todo;
@@ -207,96 +196,52 @@ int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len)
 
 tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
 {
-       tdb_off_t pad, *ret;
-
-       ret = tdb_get(tdb, off, &pad, sizeof(pad));
-       if (!ret) {
-               return TDB_OFF_ERR;
-       }
-       return *ret;
-}
+       tdb_off_t ret;
 
-/* Even on files, we can get partial writes due to signals. */
-bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
-{
-       while (len) {
-               ssize_t ret;
-               ret = pwrite(fd, buf, len, off);
-               if (ret < 0)
-                       return false;
-               if (ret == 0) {
-                       errno = ENOSPC;
-                       return false;
-               }
-               buf = (char *)buf + ret;
-               off += ret;
-               len -= ret;
-       }
-       return true;
-}
-
-/* Even on files, we can get partial reads due to signals. */
-bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
-{
-       while (len) {
-               ssize_t ret;
-               ret = pread(fd, buf, len, off);
-               if (ret < 0)
-                       return false;
-               if (ret == 0) {
-                       /* ETOOSHORT? */
-                       errno = EWOULDBLOCK;
-                       return false;
-               }
-               buf = (char *)buf + ret;
-               off += ret;
-               len -= ret;
+       if (likely(!(tdb->flags & TDB_CONVERT))) {
+               tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
+                                                   false);
+               if (p)
+                       return *p;
        }
-       return true;
-}
 
-bool tdb_read_all(int fd, void *buf, size_t len)
-{
-       while (len) {
-               ssize_t ret;
-               ret = read(fd, buf, len);
-               if (ret < 0)
-                       return false;
-               if (ret == 0) {
-                       /* ETOOSHORT? */
-                       errno = EWOULDBLOCK;
-                       return false;
-               }
-               buf = (char *)buf + ret;
-               len -= ret;
-       }
-       return true;
+       if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1)
+               return TDB_OFF_ERR;
+       return ret;
 }
 
 /* write a lump of data at a specified offset */
-static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
+static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                     const void *buf, tdb_len_t len)
 {
-       if (len == 0) {
-               return 0;
-       }
-
        if (tdb->read_only) {
-               tdb->ecode = TDB_ERR_RDONLY;
+               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
+                          "Write to read-only database");
                return -1;
        }
 
+       /* FIXME: Bogus optimization? */
+       if (len == 0) {
+               return 0;
+       }
+
        if (tdb->methods->oob(tdb, off + len, 0) != 0)
                return -1;
 
        if (tdb->map_ptr) {
                memcpy(off + (char *)tdb->map_ptr, buf, len);
        } else {
-               if (!tdb_pwrite_all(tdb->fd, buf, len, off)) {
-                       tdb->ecode = TDB_ERR_IO;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-                                "tdb_write failed at %llu len=%llu (%s)\n",
-                                off, len, strerror(errno));
+               ssize_t ret;
+               ret = pwrite(tdb->fd, buf, len, off);
+               if (ret < len) {
+                       /* This shouldn't happen: we avoid sparse files. */
+                       if (ret >= 0)
+                               errno = ENOSPC;
+
+                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "tdb_write: %zi at %zu len=%zu (%s)",
+                                  ret, (size_t)off, (size_t)len,
+                                  strerror(errno));
                        return -1;
                }
        }
@@ -314,15 +259,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
        if (tdb->map_ptr) {
                memcpy(buf, off + (char *)tdb->map_ptr, len);
        } else {
-               if (!tdb_pread_all(tdb->fd, buf, len, off)) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->ecode = TDB_ERR_IO;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-                                "tdb_read failed at %lld "
-                                "len=%lld (%s) map_size=%lld\n",
-                                (long long)off, (long long)len,
-                                strerror(errno),
-                                (long long)tdb->map_size);
+               ssize_t r = pread(tdb->fd, buf, len, off);
+               if (r != len) {
+                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "tdb_read failed with %zi at %zu "
+                                  "len=%zu (%s) map_size=%zu",
+                                  r, (size_t)off, (size_t)len,
+                                  strerror(errno),
+                                  (size_t)tdb->map_size);
                        return -1;
                }
        }
@@ -336,18 +280,17 @@ int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
        if (unlikely((tdb->flags & TDB_CONVERT))) {
                void *conv = malloc(len);
                if (!conv) {
-                       tdb->ecode = TDB_ERR_OOM;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-                                "tdb_write: no memory converting %zu bytes\n",
-                                len);
+                       tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
+                                  "tdb_write: no memory converting"
+                                  " %zu bytes", len);
                        return -1;
                }
                memcpy(conv, rec, len);
-               ret = tdb->methods->write(tdb, off,
-                                         tdb_convert(tdb, conv, len), len);
+               ret = tdb->methods->twrite(tdb, off,
+                                          tdb_convert(tdb, conv, len), len);
                free(conv);
        } else
-               ret = tdb->methods->write(tdb, off, rec, len);
+               ret = tdb->methods->twrite(tdb, off, rec, len);
 
        return ret;
 }
@@ -355,54 +298,53 @@ int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
                      void *rec, size_t len)
 {
-       int ret = tdb->methods->read(tdb, off, rec, len);
+       int ret = tdb->methods->tread(tdb, off, rec, len);
        tdb_convert(tdb, rec, len);
        return ret;
 }
 
 int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val)
 {
+       if (tdb->read_only) {
+               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
+                          "Write to read-only database");
+               return -1;
+       }
+
+       if (likely(!(tdb->flags & TDB_CONVERT))) {
+               tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p),
+                                                   true);
+               if (p) {
+                       *p = val;
+                       return 0;
+               }
+       }
        return tdb_write_convert(tdb, off, &val, sizeof(val));
 }
 
-/* read a lump of data, allocating the space for it */
-void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
+static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset,
+                            tdb_len_t len, unsigned int prefix)
 {
        void *buf;
 
        /* some systems don't like zero length malloc */
-       buf = malloc(len ? len : 1);
-       if (unlikely(!buf)) {
-               tdb->ecode = TDB_ERR_OOM;
-               tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
-                        "tdb_alloc_read malloc failed len=%lld\n",
-                        (long long)len);
-       } else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) {
+       buf = malloc(prefix + len ? prefix + len : 1);
+       if (!buf) {
+               tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR,
+                          "tdb_alloc_read malloc failed len=%zu",
+                          (size_t)(prefix + len));
+       } else if (unlikely(tdb->methods->tread(tdb, offset, buf+prefix, len)
+                           == -1)) {
                free(buf);
                buf = NULL;
        }
        return buf;
 }
 
-uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
+/* read a lump of data, allocating the space for it */
+void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
 {
-       struct tdb_used_record pad, *r;
-       const void *key;
-       uint64_t klen, hash;
-
-       r = tdb_get(tdb, off, &pad, sizeof(pad));
-       if (!r)
-               /* FIXME */
-               return 0;
-
-       klen = rec_key_length(r);
-       key = tdb_access_read(tdb, off + sizeof(pad), klen, false);
-       if (!key)
-               return 0;
-
-       hash = tdb_hash(tdb, key, klen);
-       tdb_access_release(tdb, key);
-       return hash;
+       return _tdb_alloc_read(tdb, offset, len, 0);
 }
 
 static int fill(struct tdb_context *tdb,
@@ -411,11 +353,15 @@ static int fill(struct tdb_context *tdb,
 {
        while (len) {
                size_t n = len > size ? size : len;
-
-               if (!tdb_pwrite_all(tdb->fd, buf, n, off)) {
-                       tdb->ecode = TDB_ERR_IO;
-                       tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-                                "fill write failed: giving up!\n");
+               ssize_t ret = pwrite(tdb->fd, buf, n, off);
+               if (ret < n) {
+                       if (ret >= 0)
+                               errno = ENOSPC;
+
+                       tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
+                                  "fill failed: %zi at %zu len=%zu (%s)",
+                                  ret, (size_t)off, (size_t)len,
+                                  strerror(errno));
                        return -1;
                }
                len -= n;
@@ -431,14 +377,16 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
        char buf[8192];
 
        if (tdb->read_only) {
-               tdb->ecode = TDB_ERR_RDONLY;
+               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
+                          "Expand on read-only database");
                return -1;
        }
 
        if (tdb->flags & TDB_INTERNAL) {
                char *new = realloc(tdb->map_ptr, tdb->map_size + addition);
                if (!new) {
-                       tdb->ecode = TDB_ERR_OOM;
+                       tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
+                                  "No memory to expand database");
                        return -1;
                }
                tdb->map_ptr = new;
@@ -456,7 +404,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
                   file isn't sparse, which would be very bad if we ran out of
                   disk. This must be done with write, not via mmap */
                memset(buf, 0x43, sizeof(buf));
-               if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
+               if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
                        return -1;
                tdb->map_size += addition;
                tdb_mmap(tdb);
@@ -470,137 +418,122 @@ const void *tdb_access_read(struct tdb_context *tdb,
        const void *ret = NULL;
 
        if (likely(!(tdb->flags & TDB_CONVERT)))
-               ret = tdb_direct(tdb, off, len);
+               ret = tdb->methods->direct(tdb, off, len, false);
 
        if (!ret) {
-               ret = tdb_alloc_read(tdb, off, len);
-               if (convert)
-                       tdb_convert(tdb, (void *)ret, len);
-       }
-       return ret;
-}
+               struct tdb_access_hdr *hdr;
+               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
+               if (hdr) {
+                       hdr->next = tdb->access;
+                       tdb->access = hdr;
+                       ret = hdr + 1;
+                       if (convert)
+                               tdb_convert(tdb, (void *)ret, len);
+               }
+       } else
+               tdb->direct_access++;
 
-void tdb_access_release(struct tdb_context *tdb, const void *p)
-{
-       if (!tdb->map_ptr
-           || (char *)p < (char *)tdb->map_ptr
-           || (char *)p >= (char *)tdb->map_ptr + tdb->map_size)
-               free((void *)p);
+       return ret;
 }
 
-#if 0
-/* write a lump of data at a specified offset */
-static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
-                    const void *buf, tdb_len_t len)
+void *tdb_access_write(struct tdb_context *tdb,
+                      tdb_off_t off, tdb_len_t len, bool convert)
 {
-       if (len == 0) {
-               return 0;
-       }
+       void *ret = NULL;
 
-       if (tdb->read_only || tdb->traverse_read) {
-               tdb->ecode = TDB_ERR_RDONLY;
-               return -1;
+       if (tdb->read_only) {
+               tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
+                          "Write to read-only database");
+               return NULL;
        }
 
-       if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
-               return -1;
+       if (likely(!(tdb->flags & TDB_CONVERT)))
+               ret = tdb->methods->direct(tdb, off, len, true);
 
-       if (tdb->map_ptr) {
-               memcpy(off + (char *)tdb->map_ptr, buf, len);
-       } else {
-               ssize_t written = pwrite(tdb->fd, buf, len, off);
-               if ((written != (ssize_t)len) && (written != -1)) {
-                       /* try once more */
-                       tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
-                                "%d of %d bytes at %d, trying once more\n",
-                                (int)written, len, off));
-                       written = pwrite(tdb->fd, (const char *)buf+written,
-                                        len-written,
-                                        off+written);
-               }
-               if (written == -1) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
-                                "len=%d (%s)\n", off, len, strerror(errno)));
-                       return -1;
-               } else if (written != (ssize_t)len) {
-                       tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
-                                "write %d bytes at %d in two attempts\n",
-                                len, off));
-                       return -1;
+       if (!ret) {
+               struct tdb_access_hdr *hdr;
+               hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr));
+               if (hdr) {
+                       hdr->next = tdb->access;
+                       tdb->access = hdr;
+                       hdr->off = off;
+                       hdr->len = len;
+                       hdr->convert = convert;
+                       ret = hdr + 1;
+                       if (convert)
+                               tdb_convert(tdb, (void *)ret, len);
                }
-       }
-       return 0;
-}
-
+       } else
+               tdb->direct_access++;
 
+       return ret;
+}
 
-/*
-  do an unlocked scan of the hash table heads to find the next non-zero head. The value
-  will then be confirmed with the lock held
-*/             
-static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
+static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p)
 {
-       uint32_t h = *chain;
-       if (tdb->map_ptr) {
-               for (;h < tdb->header.hash_size;h++) {
-                       if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
-                               break;
-                       }
-               }
-       } else {
-               uint32_t off=0;
-               for (;h < tdb->header.hash_size;h++) {
-                       if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
-                               break;
-                       }
-               }
+       struct tdb_access_hdr **hp;
+
+       for (hp = &tdb->access; *hp; hp = &(*hp)->next) {
+               if (*hp + 1 == p)
+                       return hp;
        }
-       (*chain) = h;
+       return NULL;
 }
 
-/* read/write a tdb_off_t */
-int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
+void tdb_access_release(struct tdb_context *tdb, const void *p)
 {
-       return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
+       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
+
+       if (hp) {
+               hdr = *hp;
+               *hp = hdr->next;
+               free(hdr);
+       } else
+               tdb->direct_access--;
 }
 
-int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
+int tdb_access_commit(struct tdb_context *tdb, void *p)
 {
-       tdb_off_t off = *d;
-       return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
-}
+       struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p);
+       int ret = 0;
+
+       if (hp) {
+               hdr = *hp;
+               if (hdr->convert)
+                       ret = tdb_write_convert(tdb, hdr->off, p, hdr->len);
+               else
+                       ret = tdb_write(tdb, hdr->off, p, hdr->len);
+               *hp = hdr->next;
+               free(hdr);
+       } else
+               tdb->direct_access--;
 
+       return ret;
+}
 
-/* read/write a record */
-int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
+static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len,
+                       bool write_mode)
 {
-       if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
-               return -1;
-       if (TDB_BAD_MAGIC(rec)) {
-               /* Ensure ecode is set for log fn. */
-               tdb->ecode = TDB_ERR_CORRUPT;
-               TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
-               return -1;
-       }
-       return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
+       if (unlikely(!tdb->map_ptr))
+               return NULL;
+
+       if (unlikely(tdb_oob(tdb, off + len, true) == -1))
+               return NULL;
+       return (char *)tdb->map_ptr + off;
 }
 
-int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
+void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val)
 {
-       struct tdb_record r = *rec;
-       return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
+       if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size)
+               *s += val;
 }
-#endif
 
 static const struct tdb_methods io_methods = {
        tdb_read,
        tdb_write,
        tdb_oob,
        tdb_expand_file,
+       tdb_direct,
 };
 
 /*