X-Git-Url: https://git.ozlabs.org/?a=blobdiff_plain;f=ccan%2Ftdb2%2Fio.c;h=a973adde65df7658c50a001c75fbc931bb852153;hb=4f73f6a6dfc0d65aa9a5055683bf7baa5a7b622e;hp=5910fc543d6dd64b3df56d2ba8376752bf25b6b8;hpb=39f01834db9b6a21d076e67d1e3143ab99aaf43e;p=ccan diff --git a/ccan/tdb2/io.c b/ccan/tdb2/io.c index 5910fc54..a973adde 100644 --- a/ccan/tdb2/io.c +++ b/ccan/tdb2/io.c @@ -70,6 +70,8 @@ void tdb_mmap(struct tdb_context *tdb) static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe) { struct stat st; + int ret; + if (len <= tdb->map_size) return 0; if (tdb->flags & TDB_INTERNAL) { @@ -85,7 +87,14 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe) return -1; } - if (fstat(tdb->fd, &st) == -1) { + if (tdb_lock_expand(tdb, F_RDLCK) != 0) + return -1; + + ret = fstat(tdb->fd, &st); + + tdb_unlock_expand(tdb, F_RDLCK); + + if (ret == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -103,6 +112,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe) /* Unmap, update size, remap */ tdb_munmap(tdb); + tdb->map_size = st.st_size; tdb_mmap(tdb); return 0; @@ -125,39 +135,20 @@ static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len) /* Either make a copy into pad and return that, or return ptr into mmap. */ /* Note: pad has to be a real object, so we can't get here if len * overflows size_t */ -/* FIXME: Transaction */ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len) { - ssize_t r; - if (likely(!(tdb->flags & TDB_CONVERT))) { void *ret = tdb_direct(tdb, off, len); if (ret) return ret; } - - if (unlikely(tdb_oob(tdb, off + len, false) == -1)) - return NULL; - - r = pread(tdb->fd, pad, len, off); - if (r != (ssize_t)len) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "tdb_read failed at %llu " - "len=%lld ret=%lld (%s) map_size=%lld\n", - (long long)off, (long long)len, - (long long)r, strerror(errno), - (long long)tdb->map_size); - return NULL; - } - return tdb_convert(tdb, pad, len); + return tdb_read_convert(tdb, off, pad, len) == -1 ? NULL : pad; } /* Endian conversion: we only ever deal with 8 byte quantities */ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) { - if (unlikely((tdb->flags & TDB_CONVERT))) { + if (unlikely((tdb->flags & TDB_CONVERT)) && buf) { uint64_t i, *p = (uint64_t *)buf; for (i = 0; i < size / 8; i++) p[i] = bswap_64(p[i]); @@ -170,23 +161,19 @@ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off, uint64_t num) { - uint64_t i, *val; - bool alloc = false; - - val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); - if (!unlikely(val)) { - val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); - if (!val) - return num; - alloc = true; - } + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false); + if (!val) + return num; for (i = 0; i < num; i++) { if (val[i]) break; } - if (unlikely(alloc)) - free(val); + tdb_access_release(tdb, val); return i; } @@ -194,62 +181,45 @@ uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off, uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, uint64_t num) { - uint64_t i, *val; - bool alloc = false; - - val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); - if (!unlikely(val)) { - val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); - if (!val) - return num; - alloc = true; - } + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false); + if (!val) + return num; for (i = 0; i < num; i++) { if (!val[i]) break; } - if (unlikely(alloc)) - free(val); + tdb_access_release(tdb, val); return i; } -static int fill(struct tdb_context *tdb, - const void *buf, size_t size, - tdb_off_t off, tdb_len_t len) -{ - while (len) { - size_t n = len > size ? size : len; - - if (!tdb_pwrite_all(tdb->fd, buf, n, off)) { - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "fill write failed: giving up!\n"); - return -1; - } - len -= n; - off += n; - } - return 0; -} - int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len) { + char buf[8192] = { 0 }; void *p = tdb_direct(tdb, off, len); if (p) { memset(p, 0, len); return 0; - } else { - char buf[8192] = { 0 }; - return fill(tdb, buf, sizeof(buf), len, off); } + while (len) { + unsigned todo = len < sizeof(buf) ? len : sizeof(buf); + if (tdb->methods->write(tdb, off, buf, todo) == -1) + return -1; + len -= todo; + off += todo; + } + return 0; } tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) { tdb_off_t pad, *ret; - ret = tdb_get(tdb, off, &pad, sizeof(ret)); + ret = tdb_get(tdb, off, &pad, sizeof(pad)); if (!ret) { return TDB_OFF_ERR; } @@ -260,7 +230,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) { while (len) { - size_t ret; + ssize_t ret; ret = pwrite(fd, buf, len, off); if (ret < 0) return false; @@ -268,13 +238,51 @@ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) errno = ENOSPC; return false; } - buf += ret; + buf = (char *)buf + ret; off += ret; len -= ret; } return true; } +/* Even on files, we can get partial reads due to signals. */ +bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off) +{ + while (len) { + ssize_t ret; + ret = pread(fd, buf, len, off); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + off += ret; + len -= ret; + } + return true; +} + +bool tdb_read_all(int fd, void *buf, size_t len) +{ + while (len) { + ssize_t ret; + ret = read(fd, buf, len); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + len -= ret; + } + return true; +} + /* write a lump of data at a specified offset */ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) @@ -316,15 +324,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, if (tdb->map_ptr) { memcpy(buf, off + (char *)tdb->map_ptr, len); } else { - ssize_t ret = pread(tdb->fd, buf, len, off); - if (ret != (ssize_t)len) { + if (!tdb_pread_all(tdb->fd, buf, len, off)) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, "tdb_read failed at %lld " - "len=%lld ret=%lld (%s) map_size=%lld\n", + "len=%lld (%s) map_size=%lld\n", (long long)off, (long long)len, - (long long)ret, strerror(errno), + strerror(errno), (long long)tdb->map_size); return -1; } @@ -333,9 +340,26 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, } int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, - void *rec, size_t len) + const void *rec, size_t len) { - return tdb->methods->write(tdb, off, tdb_convert(tdb, rec, len), len); + int ret; + if (unlikely((tdb->flags & TDB_CONVERT))) { + void *conv = malloc(len); + if (!conv) { + tdb->ecode = TDB_ERR_OOM; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_write: no memory converting %zu bytes\n", + len); + return -1; + } + memcpy(conv, rec, len); + ret = tdb->methods->write(tdb, off, + tdb_convert(tdb, conv, len), len); + free(conv); + } else + ret = tdb->methods->write(tdb, off, rec, len); + + return ret; } int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, @@ -373,56 +397,46 @@ void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) { struct tdb_used_record pad, *r; - void *key; + const void *key; uint64_t klen, hash; - r = tdb_get(tdb, off, &pad, sizeof(*r)); + r = tdb_get(tdb, off, &pad, sizeof(pad)); if (!r) /* FIXME */ return 0; klen = rec_key_length(r); - key = tdb_direct(tdb, off + sizeof(*r), klen); - if (likely(key)) - return tdb_hash(tdb, key, klen); - - key = tdb_alloc_read(tdb, off + sizeof(*r), klen); - if (unlikely(!key)) + key = tdb_access_read(tdb, off + sizeof(pad), klen, false); + if (!key) return 0; + hash = tdb_hash(tdb, key, klen); - free(key); + tdb_access_release(tdb, key); return hash; } -/* Give a piece of tdb data to a parser */ -int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, - tdb_off_t offset, tdb_len_t len, - int (*parser)(TDB_DATA key, TDB_DATA data, - void *private_data), - void *private_data) +static int fill(struct tdb_context *tdb, + const void *buf, size_t size, + tdb_off_t off, tdb_len_t len) { - TDB_DATA data; - int result; - bool allocated = false; - - data.dsize = len; - data.dptr = tdb_direct(tdb, offset, len); - if (unlikely(!data.dptr)) { - if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) { + while (len) { + size_t n = len > size ? size : len; + + if (!tdb_pwrite_all(tdb->fd, buf, n, off)) { + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "fill write failed: giving up!\n"); return -1; } - allocated = true; + len -= n; + off += n; } - result = parser(key, data, private_data); - if (unlikely(allocated)) - free(data.dptr); - return result; + return 0; } /* expand a file. we prefer to use ftruncate, as that is what posix says to use for mmap expansion */ -static int tdb_expand_file(struct tdb_context *tdb, - tdb_len_t size, tdb_len_t addition) +static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition) { char buf[8192]; @@ -431,24 +445,48 @@ static int tdb_expand_file(struct tdb_context *tdb, return -1; } - /* If this fails, we try to fill anyway. */ - if (ftruncate(tdb->fd, size+addition)) - ; + if (tdb->flags & TDB_INTERNAL) { + char *new = realloc(tdb->map_ptr, tdb->map_size + addition); + if (!new) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + tdb->map_ptr = new; + tdb->map_size += addition; + } else { + /* Unmap before trying to write; old TDB claimed OpenBSD had + * problem with this otherwise. */ + tdb_munmap(tdb); + + /* If this fails, we try to fill anyway. */ + if (ftruncate(tdb->fd, tdb->map_size + addition)) + ; - /* now fill the file with something. This ensures that the - file isn't sparse, which would be very bad if we ran out of - disk. This must be done with write, not via mmap */ - memset(buf, 0x43, sizeof(buf)); - return fill(tdb, buf, sizeof(buf), addition, size); + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1) + return -1; + tdb->map_size += addition; + tdb_mmap(tdb); + } + return 0; } const void *tdb_access_read(struct tdb_context *tdb, - tdb_off_t off, tdb_len_t len) + tdb_off_t off, tdb_len_t len, bool convert) { - const void *ret = tdb_direct(tdb, off, len); + const void *ret = NULL; + + if (likely(!(tdb->flags & TDB_CONVERT))) + ret = tdb_direct(tdb, off, len); - if (!ret) + if (!ret) { ret = tdb_alloc_read(tdb, off, len); + if (convert) + tdb_convert(tdb, (void *)ret, len); + } return ret; } @@ -534,84 +572,6 @@ static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain) (*chain) = h; } - -/* expand the database by expanding the underlying file and doing the - mmap again if necessary */ -int tdb_expand(struct tdb_context *tdb) -{ - struct tdb_record rec; - tdb_off_t offset, new_size; - - /* We have to lock every hash bucket and every free list. */ - do { - - - if (tdb_lock(tdb, -1, F_WRLCK) == -1) { - TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); - return -1; - } - - /* must know about any previous expansions by another process */ - tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); - - /* always make room for at least 100 more records, and at - least 25% more space. Round the database up to a multiple - of the page size */ - new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25); - size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; - - if (!(tdb->flags & TDB_INTERNAL)) - tdb_munmap(tdb); - - /* - * We must ensure the file is unmapped before doing this - * to ensure consistency with systems like OpenBSD where - * writes and mmaps are not consistent. - */ - - /* expand the file itself */ - if (!(tdb->flags & TDB_INTERNAL)) { - if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0) - goto fail; - } - - tdb->map_size += size; - - if (tdb->flags & TDB_INTERNAL) { - char *new_map_ptr = (char *)realloc(tdb->map_ptr, - tdb->map_size); - if (!new_map_ptr) { - tdb->map_size -= size; - goto fail; - } - tdb->map_ptr = new_map_ptr; - } else { - /* - * We must ensure the file is remapped before adding the space - * to ensure consistency with systems like OpenBSD where - * writes and mmaps are not consistent. - */ - - /* We're ok if the mmap fails as we'll fallback to read/write */ - tdb_mmap(tdb); - } - - /* form a new freelist record */ - memset(&rec,'\0',sizeof(rec)); - rec.rec_len = size - sizeof(rec); - - /* link it into the free list */ - offset = tdb->map_size - size; - if (tdb_free(tdb, offset, &rec) == -1) - goto fail; - - tdb_unlock(tdb, -1, F_WRLCK); - return 0; - fail: - tdb_unlock(tdb, -1, F_WRLCK); - return -1; -} - /* read/write a tdb_off_t */ int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) {