From: Rusty Russell Date: Thu, 26 Aug 2010 05:07:18 +0000 (+0930) Subject: tdb2: now checking a new empty database works. X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=ebdd6451e2d7aa185e62a59fa2c72ffe36772d9a;ds=sidebyside tdb2: now checking a new empty database works. --- diff --git a/ccan/tdb2/_info b/ccan/tdb2/_info index cd7412c1..7533b0bd 100644 --- a/ccan/tdb2/_info +++ b/ccan/tdb2/_info @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) printf("ccan/hash\n"); printf("ccan/likely\n"); printf("ccan/asearch\n"); + printf("ccan/build_assert\n"); return 0; } diff --git a/ccan/tdb2/check.c b/ccan/tdb2/check.c index f005a48d..e39e5083 100644 --- a/ccan/tdb2/check.c +++ b/ccan/tdb2/check.c @@ -187,7 +187,8 @@ static bool check_hash_list(struct tdb_context *tdb, num_nonzero++; } - if (num_found != num_used) { + /* free table and hash table are two of the used blocks. */ + if (num_found != num_used - 2) { tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, "tdb_check: Not all entries are in hash\n"); return false; @@ -322,11 +323,10 @@ int tdb_check(struct tdb_context *tdb, size_t num_free = 0, num_used = 0; bool hash_found = false, free_found = false; + /* This always ensures the header is uptodate. */ if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0) return -1; - update_header(tdb); - if (!check_header(tdb)) goto fail; @@ -403,9 +403,9 @@ int tdb_check(struct tdb_context *tdb, goto fail; tdb_allrecord_unlock(tdb, F_RDLCK); - return true; + return 0; fail: tdb_allrecord_unlock(tdb, F_RDLCK); - return false; + return -1; } diff --git a/ccan/tdb2/io.c b/ccan/tdb2/io.c index 5910fc54..9572e741 100644 --- a/ccan/tdb2/io.c +++ b/ccan/tdb2/io.c @@ -125,11 +125,8 @@ static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len) /* Either make a copy into pad and return that, or return ptr into mmap. */ /* Note: pad has to be a real object, so we can't get here if len * overflows size_t */ -/* FIXME: Transaction */ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len) { - ssize_t r; - if (likely(!(tdb->flags & TDB_CONVERT))) { void *ret = tdb_direct(tdb, off, len); if (ret) @@ -139,18 +136,8 @@ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len) if (unlikely(tdb_oob(tdb, off + len, false) == -1)) return NULL; - r = pread(tdb->fd, pad, len, off); - if (r != (ssize_t)len) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, - "tdb_read failed at %llu " - "len=%lld ret=%lld (%s) map_size=%lld\n", - (long long)off, (long long)len, - (long long)r, strerror(errno), - (long long)tdb->map_size); + if (tdb->methods->read(tdb, off, pad, len) == -1) return NULL; - } return tdb_convert(tdb, pad, len); } @@ -249,7 +236,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) { tdb_off_t pad, *ret; - ret = tdb_get(tdb, off, &pad, sizeof(ret)); + ret = tdb_get(tdb, off, &pad, sizeof(pad)); if (!ret) { return TDB_OFF_ERR; } @@ -260,7 +247,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) { while (len) { - size_t ret; + ssize_t ret; ret = pwrite(fd, buf, len, off); if (ret < 0) return false; @@ -268,13 +255,51 @@ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) errno = ENOSPC; return false; } - buf += ret; + buf = (char *)buf + ret; off += ret; len -= ret; } return true; } +/* Even on files, we can get partial reads due to signals. */ +bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off) +{ + while (len) { + ssize_t ret; + ret = pread(fd, buf, len, off); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + off += ret; + len -= ret; + } + return true; +} + +bool tdb_read_all(int fd, void *buf, size_t len) +{ + while (len) { + ssize_t ret; + ret = read(fd, buf, len); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + len -= ret; + } + return true; +} + /* write a lump of data at a specified offset */ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, const void *buf, tdb_len_t len) @@ -316,15 +341,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, if (tdb->map_ptr) { memcpy(buf, off + (char *)tdb->map_ptr, len); } else { - ssize_t ret = pread(tdb->fd, buf, len, off); - if (ret != (ssize_t)len) { + if (!tdb_pread_all(tdb->fd, buf, len, off)) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, "tdb_read failed at %lld " - "len=%lld ret=%lld (%s) map_size=%lld\n", + "len=%lld (%s) map_size=%lld\n", (long long)off, (long long)len, - (long long)ret, strerror(errno), + strerror(errno), (long long)tdb->map_size); return -1; } @@ -376,17 +400,17 @@ uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) void *key; uint64_t klen, hash; - r = tdb_get(tdb, off, &pad, sizeof(*r)); + r = tdb_get(tdb, off, &pad, sizeof(pad)); if (!r) /* FIXME */ return 0; klen = rec_key_length(r); - key = tdb_direct(tdb, off + sizeof(*r), klen); + key = tdb_direct(tdb, off + sizeof(pad), klen); if (likely(key)) return tdb_hash(tdb, key, klen); - key = tdb_alloc_read(tdb, off + sizeof(*r), klen); + key = tdb_alloc_read(tdb, off + sizeof(pad), klen); if (unlikely(!key)) return 0; hash = tdb_hash(tdb, key, klen); diff --git a/ccan/tdb2/lock.c b/ccan/tdb2/lock.c index dca526ce..c274c110 100644 --- a/ccan/tdb2/lock.c +++ b/ccan/tdb2/lock.c @@ -436,7 +436,8 @@ static int tdb_lock_gradual(struct tdb_context *tdb, /* lock/unlock entire database. It can only be upgradable if you have some * other way of guaranteeing exclusivity (ie. transaction write lock). * Note that we don't lock the free chains: noone can get those locks - * without a hash chain lock first. */ + * without a hash chain lock first. + * The header *will be* up to date once this returns success. */ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, enum tdb_lock_flags flags, bool upgradable) { @@ -494,27 +495,27 @@ again: return -1; } + tdb->allrecord_lock.count = 1; + /* If it's upgradable, it's actually exclusive so we can treat + * it as a write lock. */ + tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; + tdb->allrecord_lock.off = upgradable; + /* Now we re-check header, holding lock. */ if (unlikely(update_header(tdb))) { - tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size); + tdb_allrecord_unlock(tdb, ltype); goto again; } /* Now check for needing recovery. */ if (unlikely(tdb_needs_recovery(tdb))) { - tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size); + tdb_allrecord_unlock(tdb, ltype); if (tdb_lock_and_recover(tdb) == -1) { return -1; } goto again; } - - tdb->allrecord_lock.count = 1; - /* If it's upgradable, it's actually exclusive so we can treat - * it as a write lock. */ - tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; - tdb->allrecord_lock.off = upgradable; return 0; } diff --git a/ccan/tdb2/private.h b/ccan/tdb2/private.h index 1fe15635..6b15636e 100644 --- a/ccan/tdb2/private.h +++ b/ccan/tdb2/private.h @@ -83,10 +83,10 @@ typedef uint64_t tdb_off_t; /* Hash chain locks. */ #define TDB_HASH_LOCK_START 2 -/* We start wih 256 hash buckets, 10 free buckets. A 1k-sized zone. */ +/* We start wih 256 hash buckets, 10 free buckets. A 4k-sized zone. */ #define INITIAL_HASH_BITS 8 #define INITIAL_FREE_BUCKETS 10 -#define INITIAL_ZONE_BITS 10 +#define INITIAL_ZONE_BITS 12 #if !HAVE_BSWAP_64 static inline uint64_t bswap_64(uint64_t x) @@ -328,6 +328,8 @@ tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, /* Even on files, we can get partial writes due to signals. */ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off); +bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off); +bool tdb_read_all(int fd, void *buf, size_t len); /* Allocate and make a copy of some offset. */ void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len); diff --git a/ccan/tdb2/tdb.c b/ccan/tdb2/tdb.c index 3cee472c..43d1ef26 100644 --- a/ccan/tdb2/tdb.c +++ b/ccan/tdb2/tdb.c @@ -1,6 +1,7 @@ #include "private.h" #include #include +#include #include #include @@ -48,7 +49,7 @@ bool update_header(struct tdb_context *tdb) static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed, void *arg) { - return hash64_any(key, length, seed); + return hash64_stable((const unsigned char *)key, length, seed); } uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len) @@ -77,7 +78,7 @@ static uint64_t random_number(struct tdb_context *tdb) fd = open("/dev/urandom", O_RDONLY); if (fd >= 0) { - if (read(fd, &ret, sizeof(ret)) == sizeof(ret)) { + if (tdb_read_all(fd, &ret, sizeof(ret))) { tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv, "tdb_open: random from /dev/urandom\n"); close(fd); @@ -130,6 +131,7 @@ static int tdb_new_database(struct tdb_context *tdb) { /* We make it up in memory, then write it out if not internal */ struct new_database newdb; + unsigned int magic_off = offsetof(struct tdb_header, magic_food); /* Fill in the header */ newdb.hdr.version = TDB_VERSION; @@ -142,6 +144,9 @@ static int tdb_new_database(struct tdb_context *tdb) newdb.hdr.v.generation = 0; + /* The initial zone must cover the initial database size! */ + BUILD_ASSERT((1ULL << INITIAL_ZONE_BITS) >= sizeof(newdb)); + /* Free array has 1 zone, 10 buckets. All buckets empty. */ newdb.hdr.v.num_zones = 1; newdb.hdr.v.zone_bits = INITIAL_ZONE_BITS; @@ -158,6 +163,17 @@ static int tdb_new_database(struct tdb_context *tdb) sizeof(newdb.hash), sizeof(newdb.hash), 0); memset(newdb.hash, 0, sizeof(newdb.hash)); + /* Magic food */ + memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); + strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD); + + /* This creates an endian-converted database, as if read from disk */ + tdb_convert(tdb, + (char *)&newdb.hdr + magic_off, + sizeof(newdb) - magic_off); + + tdb->header = newdb.hdr; + if (tdb->flags & TDB_INTERNAL) { tdb->map_size = sizeof(newdb); tdb->map_ptr = malloc(tdb->map_size); @@ -166,9 +182,6 @@ static int tdb_new_database(struct tdb_context *tdb) return -1; } memcpy(tdb->map_ptr, &newdb, tdb->map_size); - tdb->header = newdb.hdr; - /* Convert the `ondisk' version if asked. */ - tdb_convert(tdb, tdb->map_ptr, sizeof(newdb)); return 0; } if (lseek(tdb->fd, 0, SEEK_SET) == -1) @@ -177,14 +190,6 @@ static int tdb_new_database(struct tdb_context *tdb) if (ftruncate(tdb->fd, 0) == -1) return -1; - /* This creates an endian-converted header, as if read from disk */ - tdb->header = newdb.hdr; - tdb_convert(tdb, &tdb->header, sizeof(tdb->header)); - - /* Don't endian-convert the magic food! */ - memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); - strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD); - if (!tdb_pwrite_all(tdb->fd, &newdb, sizeof(newdb), 0)) { tdb->ecode = TDB_ERR_IO; return -1; @@ -215,6 +220,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, tdb->log_priv = NULL; tdb->khash = jenkins_hash; tdb->hash_priv = NULL; + tdb_io_init(tdb); /* FIXME */ if (attr) { @@ -246,6 +252,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, goto fail; } TEST_IT(tdb->flags & TDB_CONVERT); + tdb_convert(tdb, &tdb->header, sizeof(tdb->header)); goto internal; } @@ -268,8 +275,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags, goto fail; /* errno set by tdb_brlock */ } - errno = 0; - if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) + if (!tdb_pread_all(tdb->fd, &tdb->header, sizeof(tdb->header), 0) || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) { if (!(open_flags & O_CREAT) || tdb_new_database(tdb) == -1) { if (errno == 0) { diff --git a/ccan/tdb2/tdb2.h b/ccan/tdb2/tdb2.h index 48c5ba65..a33022a8 100644 --- a/ccan/tdb2/tdb2.h +++ b/ccan/tdb2/tdb2.h @@ -52,8 +52,7 @@ extern "C" { #define TDB_INTERNAL 2 /* don't store on disk */ #define TDB_NOLOCK 4 /* don't do any locking */ #define TDB_NOMMAP 8 /* don't use mmap */ -#define TDB_CONVERT 16 /* convert endian (internal use) */ -#define TDB_BIGENDIAN 32 /* header is big-endian (internal use) */ +#define TDB_CONVERT 16 /* convert endian */ #define TDB_NOSYNC 64 /* don't use synchronous transactions */ #define TDB_SEQNUM 128 /* maintain a sequence number */ #define TDB_VOLATILE 256 /* Activate the per-hashchain freelist, default 5 */ diff --git a/ccan/tdb2/test/run-encode.c b/ccan/tdb2/test/run-encode.c index a6253fe4..aa2ec51a 100644 --- a/ccan/tdb2/test/run-encode.c +++ b/ccan/tdb2/test/run-encode.c @@ -3,14 +3,15 @@ #include #include #include +#include "logging.h" int main(int argc, char *argv[]) { unsigned int i; struct tdb_used_record rec; - struct tdb_context tdb = { .log = null_log_fn, .log_priv = NULL }; + struct tdb_context tdb = { .log = tap_log_fn, .log_priv = NULL }; - plan_tests(64 + 32 + 48*6); + plan_tests(64 + 32 + 48*6 + 1); /* We should be able to encode any data value. */ for (i = 0; i < 64; i++) @@ -36,5 +37,6 @@ int main(int argc, char *argv[]) ok1(rec_hash(&rec) == h); ok1(rec_magic(&rec) == TDB_MAGIC); } + ok1(tap_log_messages == 0); return exit_status(); }