X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb2%2Fio.c;h=999922b12e517c63be16ce1b20d7d22d077f9acc;hp=9572e7412eccc4d6f3666c6ab11d786f7c08e91e;hb=6dbbfabca414018b4c5acb3e6e597d84e2b19caf;hpb=ebdd6451e2d7aa185e62a59fa2c72ffe36772d9a diff --git a/ccan/tdb2/io.c b/ccan/tdb2/io.c index 9572e741..999922b1 100644 --- a/ccan/tdb2/io.c +++ b/ccan/tdb2/io.c @@ -26,6 +26,7 @@ License along with this library; if not, see . */ #include "private.h" +#include #include void tdb_munmap(struct tdb_context *tdb) @@ -47,8 +48,7 @@ void tdb_mmap(struct tdb_context *tdb) if (tdb->flags & TDB_NOMMAP) return; - tdb->map_ptr = mmap(NULL, tdb->map_size, - PROT_READ|(tdb->read_only? 0:PROT_WRITE), + tdb->map_ptr = mmap(NULL, tdb->map_size, tdb->mmap_flags, MAP_SHARED, tdb->fd, 0); /* @@ -56,9 +56,9 @@ void tdb_mmap(struct tdb_context *tdb) */ if (tdb->map_ptr == MAP_FAILED) { tdb->map_ptr = NULL; - tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv, - "tdb_mmap failed for size %lld (%s)\n", - (long long)tdb->map_size, strerror(errno)); + tdb_logerr(tdb, TDB_SUCCESS, TDB_DEBUG_WARNING, + "tdb_mmap failed for size %lld (%s)", + (long long)tdb->map_size, strerror(errno)); } } @@ -70,81 +70,58 @@ void tdb_mmap(struct tdb_context *tdb) static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe) { struct stat st; + + /* We can't hold pointers during this: we could unmap! */ + assert(!tdb->direct_access + || (tdb->flags & TDB_NOLOCK) + || tdb_has_expansion_lock(tdb)); + if (len <= tdb->map_size) return 0; if (tdb->flags & TDB_INTERNAL) { if (!probe) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, "tdb_oob len %lld beyond internal" - " malloc size %lld\n", + " malloc size %lld", (long long)len, (long long)tdb->map_size); } return -1; } - if (fstat(tdb->fd, &st) == -1) { - tdb->ecode = TDB_ERR_IO; + if (tdb_lock_expand(tdb, F_RDLCK) != 0) + return -1; + + if (fstat(tdb->fd, &st) != 0) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, + "Failed to fstat file: %s", strerror(errno)); + tdb_unlock_expand(tdb, F_RDLCK); return -1; } + tdb_unlock_expand(tdb, F_RDLCK); + if (st.st_size < (size_t)len) { if (!probe) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "tdb_oob len %lld beyond eof at %lld\n", - (long long)len, (long long)st.st_size); + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, + "tdb_oob len %zu beyond eof at %zu", + (size_t)len, st.st_size); } return -1; } /* Unmap, update size, remap */ tdb_munmap(tdb); + tdb->map_size = st.st_size; tdb_mmap(tdb); return 0; } -static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len) -{ - if (unlikely(!tdb->map_ptr)) - return NULL; - - /* FIXME: We can do a subset of this! */ - if (tdb->transaction) - return NULL; - - if (unlikely(tdb_oob(tdb, off + len, true) == -1)) - return NULL; - return (char *)tdb->map_ptr + off; -} - -/* Either make a copy into pad and return that, or return ptr into mmap. */ -/* Note: pad has to be a real object, so we can't get here if len - * overflows size_t */ -void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len) -{ - if (likely(!(tdb->flags & TDB_CONVERT))) { - void *ret = tdb_direct(tdb, off, len); - if (ret) - return ret; - } - - if (unlikely(tdb_oob(tdb, off + len, false) == -1)) - return NULL; - - if (tdb->methods->read(tdb, off, pad, len) == -1) - return NULL; - return tdb_convert(tdb, pad, len); -} - /* Endian conversion: we only ever deal with 8 byte quantities */ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) { - if (unlikely((tdb->flags & TDB_CONVERT))) { + if (unlikely((tdb->flags & TDB_CONVERT)) && buf) { uint64_t i, *p = (uint64_t *)buf; for (i = 0; i < size / 8; i++) p[i] = bswap_64(p[i]); @@ -152,95 +129,81 @@ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) return buf; } -/* Return first non-zero offset in num offset array, or num. */ /* FIXME: Return the off? */ -uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off, - uint64_t num) +uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, + tdb_off_t base, uint64_t start, uint64_t end) { - uint64_t i, *val; - bool alloc = false; - - val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); - if (!unlikely(val)) { - val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); - if (!val) - return num; - alloc = true; - } + uint64_t i; + const uint64_t *val; - for (i = 0; i < num; i++) { + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t), + (end - start) * sizeof(tdb_off_t), false); + if (!val) + return end; + + for (i = 0; i < (end - start); i++) { if (val[i]) break; } - if (unlikely(alloc)) - free(val); - return i; + tdb_access_release(tdb, val); + return start + i; } /* Return first zero offset in num offset array, or num. */ uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, uint64_t num) { - uint64_t i, *val; - bool alloc = false; - - val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); - if (!unlikely(val)) { - val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); - if (!val) - return num; - alloc = true; - } + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false); + if (!val) + return num; for (i = 0; i < num; i++) { if (!val[i]) break; } - if (unlikely(alloc)) - free(val); + tdb_access_release(tdb, val); return i; } -static int fill(struct tdb_context *tdb, - const void *buf, size_t size, - tdb_off_t off, tdb_len_t len) -{ - while (len) { - size_t n = len > size ? size : len; - - if (!tdb_pwrite_all(tdb->fd, buf, n, off)) { - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "fill write failed: giving up!\n"); - return -1; - } - len -= n; - off += n; - } - return 0; -} - int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len) { - void *p = tdb_direct(tdb, off, len); + char buf[8192] = { 0 }; + void *p = tdb->methods->direct(tdb, off, len, true); + + assert(!tdb->read_only); if (p) { memset(p, 0, len); return 0; - } else { - char buf[8192] = { 0 }; - return fill(tdb, buf, sizeof(buf), len, off); } + while (len) { + unsigned todo = len < sizeof(buf) ? len : sizeof(buf); + if (tdb->methods->write(tdb, off, buf, todo) == -1) + return -1; + len -= todo; + off += todo; + } + return 0; } tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) { - tdb_off_t pad, *ret; + tdb_off_t ret; - ret = tdb_get(tdb, off, &pad, sizeof(pad)); - if (!ret) { - return TDB_OFF_ERR; + if (likely(!(tdb->flags & TDB_CONVERT))) { + tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p), + false); + if (p) + return *p; } - return *ret; + + if (tdb_read_convert(tdb, off, &ret, sizeof(ret)) == -1) + return TDB_OFF_ERR; + return ret; } /* Even on files, we can get partial writes due to signals. */ @@ -262,57 +225,21 @@ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) return true; } -/* Even on files, we can get partial reads due to signals. */ -bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off) -{ - while (len) { - ssize_t ret; - ret = pread(fd, buf, len, off); - if (ret < 0) - return false; - if (ret == 0) { - /* ETOOSHORT? */ - errno = EWOULDBLOCK; - return false; - } - buf = (char *)buf + ret; - off += ret; - len -= ret; - } - return true; -} - -bool tdb_read_all(int fd, void *buf, size_t len) -{ - while (len) { - ssize_t ret; - ret = read(fd, buf, len); - if (ret < 0) - return false; - if (ret == 0) { - /* ETOOSHORT? */ - errno = EWOULDBLOCK; - return false; - } - buf = (char *)buf + ret; - len -= ret; - } - return true; -} - /* write a lump of data at a specified offset */ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) { - if (len == 0) { - return 0; - } - if (tdb->read_only) { - tdb->ecode = TDB_ERR_RDONLY; + tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING, + "Write to read-only database"); return -1; } + /* FIXME: Bogus optimization? */ + if (len == 0) { + return 0; + } + if (tdb->methods->oob(tdb, off + len, 0) != 0) return -1; @@ -320,10 +247,9 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, memcpy(off + (char *)tdb->map_ptr, buf, len); } else { if (!tdb_pwrite_all(tdb->fd, buf, len, off)) { - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "tdb_write failed at %llu len=%llu (%s)\n", - off, len, strerror(errno)); + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, + "tdb_write failed at %zu len=%zu (%s)", + (size_t)off, (size_t)len, strerror(errno)); return -1; } } @@ -341,15 +267,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, if (tdb->map_ptr) { memcpy(buf, off + (char *)tdb->map_ptr, len); } else { - if (!tdb_pread_all(tdb->fd, buf, len, off)) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "tdb_read failed at %lld " - "len=%lld (%s) map_size=%lld\n", - (long long)off, (long long)len, - strerror(errno), - (long long)tdb->map_size); + ssize_t r = pread(tdb->fd, buf, len, off); + if (r != len) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, + "tdb_read failed with %zi at %zu " + "len=%zu (%s) map_size=%zu", + r, (size_t)off, (size_t)len, + strerror(errno), + (size_t)tdb->map_size); return -1; } } @@ -357,9 +282,25 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, } int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, - void *rec, size_t len) + const void *rec, size_t len) { - return tdb->methods->write(tdb, off, tdb_convert(tdb, rec, len), len); + int ret; + if (unlikely((tdb->flags & TDB_CONVERT))) { + void *conv = malloc(len); + if (!conv) { + tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL, + "tdb_write: no memory converting" + " %zu bytes", len); + return -1; + } + memcpy(conv, rec, len); + ret = tdb->methods->write(tdb, off, + tdb_convert(tdb, conv, len), len); + free(conv); + } else + ret = tdb->methods->write(tdb, off, rec, len); + + return ret; } int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, @@ -372,309 +313,230 @@ int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val) { + if (tdb->read_only) { + tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING, + "Write to read-only database"); + return -1; + } + + if (likely(!(tdb->flags & TDB_CONVERT))) { + tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p), + true); + if (p) { + *p = val; + return 0; + } + } return tdb_write_convert(tdb, off, &val, sizeof(val)); } -/* read a lump of data, allocating the space for it */ -void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) +static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, + tdb_len_t len, unsigned int prefix) { void *buf; /* some systems don't like zero length malloc */ - buf = malloc(len ? len : 1); - if (unlikely(!buf)) { - tdb->ecode = TDB_ERR_OOM; - tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, - "tdb_alloc_read malloc failed len=%lld\n", - (long long)len); - } else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) { + buf = malloc(prefix + len ? prefix + len : 1); + if (!buf) { + tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_ERROR, + "tdb_alloc_read malloc failed len=%zu", + (size_t)(prefix + len)); + } else if (unlikely(tdb->methods->read(tdb, offset, buf+prefix, + len) == -1)) { free(buf); buf = NULL; } return buf; } -uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) +/* read a lump of data, allocating the space for it */ +void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) { - struct tdb_used_record pad, *r; - void *key; - uint64_t klen, hash; - - r = tdb_get(tdb, off, &pad, sizeof(pad)); - if (!r) - /* FIXME */ - return 0; - - klen = rec_key_length(r); - key = tdb_direct(tdb, off + sizeof(pad), klen); - if (likely(key)) - return tdb_hash(tdb, key, klen); - - key = tdb_alloc_read(tdb, off + sizeof(pad), klen); - if (unlikely(!key)) - return 0; - hash = tdb_hash(tdb, key, klen); - free(key); - return hash; + return _tdb_alloc_read(tdb, offset, len, 0); } -/* Give a piece of tdb data to a parser */ -int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, - tdb_off_t offset, tdb_len_t len, - int (*parser)(TDB_DATA key, TDB_DATA data, - void *private_data), - void *private_data) +static int fill(struct tdb_context *tdb, + const void *buf, size_t size, + tdb_off_t off, tdb_len_t len) { - TDB_DATA data; - int result; - bool allocated = false; - - data.dsize = len; - data.dptr = tdb_direct(tdb, offset, len); - if (unlikely(!data.dptr)) { - if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) { + while (len) { + size_t n = len > size ? size : len; + + if (!tdb_pwrite_all(tdb->fd, buf, n, off)) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_DEBUG_FATAL, + "fill write failed: giving up!"); return -1; } - allocated = true; + len -= n; + off += n; } - result = parser(key, data, private_data); - if (unlikely(allocated)) - free(data.dptr); - return result; + return 0; } /* expand a file. we prefer to use ftruncate, as that is what posix says to use for mmap expansion */ -static int tdb_expand_file(struct tdb_context *tdb, - tdb_len_t size, tdb_len_t addition) +static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition) { char buf[8192]; if (tdb->read_only) { - tdb->ecode = TDB_ERR_RDONLY; + tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING, + "Expand on read-only database"); return -1; } - /* If this fails, we try to fill anyway. */ - if (ftruncate(tdb->fd, size+addition)) - ; - - /* now fill the file with something. This ensures that the - file isn't sparse, which would be very bad if we ran out of - disk. This must be done with write, not via mmap */ - memset(buf, 0x43, sizeof(buf)); - return fill(tdb, buf, sizeof(buf), addition, size); -} - -const void *tdb_access_read(struct tdb_context *tdb, - tdb_off_t off, tdb_len_t len) -{ - const void *ret = tdb_direct(tdb, off, len); - - if (!ret) - ret = tdb_alloc_read(tdb, off, len); - return ret; -} - -void tdb_access_release(struct tdb_context *tdb, const void *p) -{ - if (!tdb->map_ptr - || (char *)p < (char *)tdb->map_ptr - || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) - free((void *)p); -} - -#if 0 -/* write a lump of data at a specified offset */ -static int tdb_write(struct tdb_context *tdb, tdb_off_t off, - const void *buf, tdb_len_t len) -{ - if (len == 0) { - return 0; - } - - if (tdb->read_only || tdb->traverse_read) { - tdb->ecode = TDB_ERR_RDONLY; - return -1; - } + if (tdb->flags & TDB_INTERNAL) { + char *new = realloc(tdb->map_ptr, tdb->map_size + addition); + if (!new) { + tdb_logerr(tdb, TDB_ERR_OOM, TDB_DEBUG_FATAL, + "No memory to expand database"); + return -1; + } + tdb->map_ptr = new; + tdb->map_size += addition; + } else { + /* Unmap before trying to write; old TDB claimed OpenBSD had + * problem with this otherwise. */ + tdb_munmap(tdb); - if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) - return -1; + /* If this fails, we try to fill anyway. */ + if (ftruncate(tdb->fd, tdb->map_size + addition)) + ; - if (tdb->map_ptr) { - memcpy(off + (char *)tdb->map_ptr, buf, len); - } else { - ssize_t written = pwrite(tdb->fd, buf, len, off); - if ((written != (ssize_t)len) && (written != -1)) { - /* try once more */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " - "%d of %d bytes at %d, trying once more\n", - (int)written, len, off)); - written = pwrite(tdb->fd, (const char *)buf+written, - len-written, - off+written); - } - if (written == -1) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d " - "len=%d (%s)\n", off, len, strerror(errno))); + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1) return -1; - } else if (written != (ssize_t)len) { - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to " - "write %d bytes at %d in two attempts\n", - len, off)); - return -1; - } + tdb->map_size += addition; + tdb_mmap(tdb); } return 0; } +const void *tdb_access_read(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert) +{ + const void *ret = NULL; + if (likely(!(tdb->flags & TDB_CONVERT))) + ret = tdb->methods->direct(tdb, off, len, false); -/* - do an unlocked scan of the hash table heads to find the next non-zero head. The value - will then be confirmed with the lock held -*/ -static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain) -{ - uint32_t h = *chain; - if (tdb->map_ptr) { - for (;h < tdb->header.hash_size;h++) { - if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) { - break; - } - } - } else { - uint32_t off=0; - for (;h < tdb->header.hash_size;h++) { - if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) { - break; - } + if (!ret) { + struct tdb_access_hdr *hdr; + hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); + if (hdr) { + hdr->next = tdb->access; + tdb->access = hdr; + ret = hdr + 1; + if (convert) + tdb_convert(tdb, (void *)ret, len); } - } - (*chain) = h; -} + } else + tdb->direct_access++; + return ret; +} -/* expand the database by expanding the underlying file and doing the - mmap again if necessary */ -int tdb_expand(struct tdb_context *tdb) +void *tdb_access_write(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert) { - struct tdb_record rec; - tdb_off_t offset, new_size; - - /* We have to lock every hash bucket and every free list. */ - do { - - - if (tdb_lock(tdb, -1, F_WRLCK) == -1) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); - return -1; - } - - /* must know about any previous expansions by another process */ - tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); - - /* always make room for at least 100 more records, and at - least 25% more space. Round the database up to a multiple - of the page size */ - new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25); - size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; + void *ret = NULL; - if (!(tdb->flags & TDB_INTERNAL)) - tdb_munmap(tdb); - - /* - * We must ensure the file is unmapped before doing this - * to ensure consistency with systems like OpenBSD where - * writes and mmaps are not consistent. - */ - - /* expand the file itself */ - if (!(tdb->flags & TDB_INTERNAL)) { - if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0) - goto fail; + if (tdb->read_only) { + tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_DEBUG_WARNING, + "Write to read-only database"); + return NULL; } - tdb->map_size += size; + if (likely(!(tdb->flags & TDB_CONVERT))) + ret = tdb->methods->direct(tdb, off, len, true); - if (tdb->flags & TDB_INTERNAL) { - char *new_map_ptr = (char *)realloc(tdb->map_ptr, - tdb->map_size); - if (!new_map_ptr) { - tdb->map_size -= size; - goto fail; + if (!ret) { + struct tdb_access_hdr *hdr; + hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); + if (hdr) { + hdr->next = tdb->access; + tdb->access = hdr; + hdr->off = off; + hdr->len = len; + hdr->convert = convert; + ret = hdr + 1; + if (convert) + tdb_convert(tdb, (void *)ret, len); } - tdb->map_ptr = new_map_ptr; - } else { - /* - * We must ensure the file is remapped before adding the space - * to ensure consistency with systems like OpenBSD where - * writes and mmaps are not consistent. - */ - - /* We're ok if the mmap fails as we'll fallback to read/write */ - tdb_mmap(tdb); - } + } else + tdb->direct_access++; - /* form a new freelist record */ - memset(&rec,'\0',sizeof(rec)); - rec.rec_len = size - sizeof(rec); + return ret; +} - /* link it into the free list */ - offset = tdb->map_size - size; - if (tdb_free(tdb, offset, &rec) == -1) - goto fail; +static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p) +{ + struct tdb_access_hdr **hp; - tdb_unlock(tdb, -1, F_WRLCK); - return 0; - fail: - tdb_unlock(tdb, -1, F_WRLCK); - return -1; + for (hp = &tdb->access; *hp; hp = &(*hp)->next) { + if (*hp + 1 == p) + return hp; + } + return NULL; } -/* read/write a tdb_off_t */ -int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +void tdb_access_release(struct tdb_context *tdb, const void *p) { - return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV()); + struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); + + if (hp) { + hdr = *hp; + *hp = hdr->next; + free(hdr); + } else + tdb->direct_access--; } -int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +int tdb_access_commit(struct tdb_context *tdb, void *p) { - tdb_off_t off = *d; - return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d)); -} + struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); + int ret = 0; + + if (hp) { + hdr = *hp; + if (hdr->convert) + ret = tdb_write_convert(tdb, hdr->off, p, hdr->len); + else + ret = tdb_write(tdb, hdr->off, p, hdr->len); + *hp = hdr->next; + free(hdr); + } else + tdb->direct_access--; + return ret; +} -/* read/write a record */ -int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec) +static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len, + bool write_mode) { - if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) - return -1; - if (TDB_BAD_MAGIC(rec)) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_CORRUPT; - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); - return -1; - } - return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0); + if (unlikely(!tdb->map_ptr)) + return NULL; + + if (unlikely(tdb_oob(tdb, off + len, true) == -1)) + return NULL; + return (char *)tdb->map_ptr + off; } -int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec) +void add_stat_(struct tdb_context *tdb, uint64_t *s, size_t val) { - struct tdb_record r = *rec; - return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r)); + if ((uintptr_t)s < (uintptr_t)tdb->stats + tdb->stats->size) + *s += val; } -#endif static const struct tdb_methods io_methods = { tdb_read, tdb_write, tdb_oob, tdb_expand_file, + tdb_direct, }; /*