X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Ftdb2%2Flock.c;h=15c97e3b98343b0a02ee910bdac0ec93664e3527;hp=c274c11051cc416bec0977319dec5f9ed712f1ec;hb=86028553af9bac5bca6aebf5f9d29b1494c8c446;hpb=ebdd6451e2d7aa185e62a59fa2c72ffe36772d9a

diff --git a/ccan/tdb2/lock.c b/ccan/tdb2/lock.c
index c274c110..15c97e3b 100644
--- a/ccan/tdb2/lock.c
+++ b/ccan/tdb2/lock.c
@@ -26,6 +26,8 @@
 */
 
 #include "private.h"
+#include <assert.h>
+#include <ccan/build_assert/build_assert.h>
 
 static int fcntl_lock(struct tdb_context *tdb,
 		      int rw, off_t off, off_t len, bool waitflag)
@@ -255,14 +257,14 @@ static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype,
 {
 	struct tdb_lock_type *new_lck;
 
-	if (offset >= TDB_HASH_LOCK_START + (1ULL << tdb->header.v.hash_bits)
-	    + (tdb->header.v.num_zones * (tdb->header.v.free_buckets+1))) {
+	if (offset >= TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + tdb->map_size / 8) {
 		tdb->ecode = TDB_ERR_LOCK;
 		tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-			 "tdb_lock: invalid offset %llu for ltype=%d\n",
+			 "tdb_lock: invalid offset %llu ltype=%d\n",
 			 (long long)offset, ltype);
 		return -1;
 	}
+
 	if (tdb->flags & TDB_NOLOCK)
 		return 0;
 
@@ -372,11 +374,6 @@ static int tdb_nest_unlock(struct tdb_context *tdb, tdb_off_t off, int ltype)
 	 */
 	*lck = tdb->lockrecs[--tdb->num_lockrecs];
 
-	if (tdb->num_lockrecs == 0) {
-		/* If we're not holding any locks, header can change. */
-		tdb->header_uptodate = false;
-	}
-
 	return ret;
 }
 
@@ -408,8 +405,10 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
 	int ret;
 	enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
 
-	if (len <= 4) {
-		/* Single record.  Just do blocking lock. */
+	if (len <= 1) {
+		/* 0 would mean to end-of-file... */
+		assert(len != 0);
+		/* Single hash.  Just do blocking lock. */
 		return tdb_brlock(tdb, ltype, off, len, flags);
 	}
 
@@ -435,14 +434,11 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
 
 /* lock/unlock entire database.  It can only be upgradable if you have some
  * other way of guaranteeing exclusivity (ie. transaction write lock).
- * Note that we don't lock the free chains: noone can get those locks
- * without a hash chain lock first.
- * The header *will be* up to date once this returns success. */
+ * Note that we don't lock the free chains: currently noone can get those locks
+ * without a hash chain lock first. */
 int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 		       enum tdb_lock_flags flags, bool upgradable)
 {
-	tdb_off_t hash_size;
-
 	/* FIXME: There are no locks on read-only dbs */
 	if (tdb->read_only) {
 		tdb->ecode = TDB_ERR_LOCK;
@@ -482,11 +478,9 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 		return -1;
 	}
 
-	/* Lock all the hash buckets. */
 again:
-	hash_size = (1ULL << tdb->header.v.hash_bits);
-	if (tdb_lock_gradual(tdb, ltype, TDB_HASH_LOCK_START,
-			     1ULL << tdb->header.v.hash_bits, flags)) {
+	if (tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
+			     TDB_HASH_LOCK_RANGE)) {
 		if (!(flags & TDB_LOCK_PROBE)) {
 			tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
 				 "tdb_lockall hashes failed (%s)\n",
@@ -501,12 +495,6 @@ again:
 	tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
 	tdb->allrecord_lock.off = upgradable;
 
-	/* Now we re-check header, holding lock. */
-	if (unlikely(update_header(tdb))) {
-		tdb_allrecord_unlock(tdb, ltype);
-		goto again;
-	}
-
 	/* Now check for needing recovery. */
 	if (unlikely(tdb_needs_recovery(tdb))) {
 		tdb_allrecord_unlock(tdb, ltype);
@@ -529,11 +517,19 @@ void tdb_unlock_open(struct tdb_context *tdb)
 	tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
 }
 
+int tdb_lock_expand(struct tdb_context *tdb, int ltype)
+{
+	return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype, TDB_LOCK_WAIT);
+}
+
+void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
+{
+	tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
+}
+
 /* unlock entire db */
 int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
 {
-	tdb_off_t hash_size;
-
 	/* FIXME: There are no locks on read-only dbs */
 	if (tdb->read_only) {
 		tdb->ecode = TDB_ERR_LOCK;
@@ -568,9 +564,14 @@ int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
 	tdb->allrecord_lock.count = 0;
 	tdb->allrecord_lock.ltype = 0;
 
-	hash_size = (1ULL << tdb->header.v.hash_bits);
+	return tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
+			    TDB_HASH_LOCK_RANGE);
+}
 
-	return tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
+bool tdb_has_expansion_lock(struct tdb_context *tdb)
+{
+	return find_nestlock(tdb, TDB_EXPANSION_LOCK) != NULL
+		|| (tdb->flags & TDB_NOLOCK);
 }
 
 bool tdb_has_locks(struct tdb_context *tdb)
@@ -624,9 +625,15 @@ int tdb_unlockall_read(struct tdb_context *tdb)
 }
 #endif
 
-int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list,
-		  int ltype, enum tdb_lock_flags waitflag)
+int tdb_lock_hashes(struct tdb_context *tdb,
+		    tdb_off_t hash_lock,
+		    tdb_len_t hash_range,
+		    int ltype, enum tdb_lock_flags waitflag)
 {
+	/* FIXME: Do this properly, using hlock_range */
+	unsigned lock = TDB_HASH_LOCK_START
+		+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
+
 	/* a allrecord lock allows us to avoid per chain locks */
 	if (tdb->allrecord_lock.count &&
 	    (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
@@ -642,12 +649,16 @@ int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list,
 		return -1;
 	}
 
-	/* FIXME: Should we do header_uptodate and return retry here? */
-	return tdb_nest_lock(tdb, TDB_HASH_LOCK_START + list, ltype, waitflag);
+	return tdb_nest_lock(tdb, lock, ltype, waitflag);
 }
 
-int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
+int tdb_unlock_hashes(struct tdb_context *tdb,
+		      tdb_off_t hash_lock,
+		      tdb_len_t hash_range, int ltype)
 {
+	unsigned lock = TDB_HASH_LOCK_START
+		+ (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
+
 	/* a allrecord lock allows us to avoid per chain locks */
 	if (tdb->allrecord_lock.count) {
 		if (tdb->allrecord_lock.ltype == F_RDLCK
@@ -658,17 +669,28 @@ int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
 			return -1;
 		}
 		return 0;
-	} else {
-		return tdb_nest_unlock(tdb, TDB_HASH_LOCK_START + list, ltype);
 	}
+
+	return tdb_nest_unlock(tdb, lock, ltype);
+}
+
+/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
+ * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
+ * The result is that on 32 bit systems we don't use lock values > 2^31 on
+ * files that are less than 4GB.
+ */
+static tdb_off_t free_lock_off(tdb_off_t b_off)
+{
+	return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + b_off / sizeof(tdb_off_t);
 }
 
-/* Free list locks come after hash locks */
-int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist,
-		       enum tdb_lock_flags waitflag)
+int tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
+			 enum tdb_lock_flags waitflag)
 {
+	assert(b_off >= sizeof(struct tdb_header));
+
 	/* You're supposed to have a hash lock first! */
-	if (!tdb_has_locks(tdb)) {
+	if (!(tdb->flags & TDB_NOLOCK) && !tdb_has_locks(tdb)) {
 		tdb->ecode = TDB_ERR_LOCK;
 		tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
 			 "tdb_lock_free_list without lock!\n");
@@ -685,39 +707,27 @@ int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist,
 		return -1;
 	}
 
-	return tdb_nest_lock(tdb, TDB_HASH_LOCK_START
-			     + (1ULL << tdb->header.v.hash_bits)
-			     + flist, F_WRLCK, waitflag);
+	return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
 }
 
-void tdb_unlock_free_list(struct tdb_context *tdb, tdb_off_t flist)
+void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
 {
 	if (tdb->allrecord_lock.count)
 		return;
 
-	tdb_nest_unlock(tdb, TDB_HASH_LOCK_START
-			+ (1ULL << tdb->header.v.hash_bits)
-			+ flist, F_WRLCK);
+	tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
 }
 
-#if 0
-static int chainlock_loop(struct tdb_context *tdb, const TDB_DATA *key,
-			  int ltype, enum tdb_lock_flags waitflag,
-			  const char *func)
+/* Even if the entry isn't in this hash bucket, you'd have to lock this
+ * bucket to find it. */
+static int chainlock(struct tdb_context *tdb, const TDB_DATA *key,
+		     int ltype, enum tdb_lock_flags waitflag,
+		     const char *func)
 {
 	int ret;
 	uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
 
-again:
-	ret = tdb_lock_list(tdb,
-			    h & ((1ULL << tdb->header.v.hash_bits) - 1),
-			    ltype, waitflag);
-	if (likely(ret == 0) && unlikely(update_header(tdb))) {
-		tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1),
-				ltype);
-		goto again;
-	}
-
+	ret = tdb_lock_hashes(tdb, h, 1, ltype, waitflag);
 	tdb_trace_1rec(tdb, func, *key);
 	return ret;
 }
@@ -726,31 +736,30 @@ again:
    contention - it cannot guarantee how many records will be locked */
 int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
 {
-	return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_WAIT,
-			      "tdb_chainlock");
+	return chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT, "tdb_chainlock");
+}
+
+int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
+{
+	uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
+	tdb_trace_1rec(tdb, "tdb_chainunlock", key);
+	return tdb_unlock_hashes(tdb, h, 1, F_WRLCK);
 }
 
+#if 0
 /* lock/unlock one hash chain, non-blocking. This is meant to be used
    to reduce contention - it cannot guarantee how many records will be
    locked */
 int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
 {
-	return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_NOWAIT,
-			      "tdb_chainlock_nonblock");
-}
-
-int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
-{
-	uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
-	tdb_trace_1rec(tdb, "tdb_chainunlock", key);
-	return tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1),
-			       F_WRLCK);
+	return chainlock(tdb, &key, F_WRLCK, TDB_LOCK_NOWAIT,
+			 "tdb_chainlock_nonblock");
 }
 
 int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
 {
-	return chainlock_loop(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
-			      "tdb_chainlock_read");
+	return chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
+			 "tdb_chainlock_read");
 }
 
 int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
@@ -847,3 +856,10 @@ void tdb_release_transaction_locks(struct tdb_context *tdb)
 	tdb->header_uptodate = false;
 }
 #endif
+
+void tdb_lock_init(struct tdb_context *tdb)
+{
+	tdb->num_lockrecs = 0;
+	tdb->lockrecs = NULL;
+	tdb->allrecord_lock.count = 0;
+}