*/
#include "private.h"
+#include <assert.h>
+#include <ccan/build_assert/build_assert.h>
static int fcntl_lock(struct tdb_context *tdb,
int rw, off_t off, off_t len, bool waitflag)
{
struct tdb_lock_type *new_lck;
- if (offset >= TDB_HASH_LOCK_START + (1ULL << tdb->header.v.hash_bits)
- + (tdb->header.v.num_zones * (tdb->header.v.free_buckets+1))) {
+ if (offset >= TDB_HASH_LOCK_START + (1 << 30) + tdb->map_size / 8) {
tdb->ecode = TDB_ERR_LOCK;
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
- "tdb_lock: invalid offset %llu for ltype=%d\n",
+ "tdb_lock: invalid offset %llu ltype=%d\n",
(long long)offset, ltype);
return -1;
}
+
if (tdb->flags & TDB_NOLOCK)
return 0;
/* Lock all the hash buckets. */
again:
hash_size = (1ULL << tdb->header.v.hash_bits);
- if (tdb_lock_gradual(tdb, ltype, TDB_HASH_LOCK_START,
- 1ULL << tdb->header.v.hash_bits, flags)) {
+ if (tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
+ hash_size)) {
if (!(flags & TDB_LOCK_PROBE)) {
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_lockall hashes failed (%s)\n",
tdb->allrecord_lock.off = upgradable;
/* Now we re-check header, holding lock. */
- if (unlikely(update_header(tdb))) {
+ if (unlikely(header_changed(tdb))) {
tdb_allrecord_unlock(tdb, ltype);
goto again;
}
tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
}
+int tdb_lock_expand(struct tdb_context *tdb, int ltype)
+{
+ return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype, TDB_LOCK_WAIT);
+}
+
+void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
+{
+ tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
+}
+
/* unlock entire db */
int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
{
}
#endif
-int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list,
- int ltype, enum tdb_lock_flags waitflag)
+/* Returns the list we actually locked. */
+tdb_off_t tdb_lock_list(struct tdb_context *tdb, uint64_t hash,
+ int ltype, enum tdb_lock_flags waitflag)
{
+ tdb_off_t list = hash & ((1ULL << tdb->header.v.hash_bits) - 1);
+ /* Header can change ONLY if we had no locks before. */
+ bool can_change = tdb->num_lockrecs == 0;
+
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->allrecord_lock.count &&
(ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
- return 0;
+ return list;
}
if (tdb->allrecord_lock.count) {
"tdb_lock_list: have %s allrecordlock\n",
tdb->allrecord_lock.ltype == F_RDLCK
? "read" : "write");
- return -1;
+ return TDB_OFF_ERR;
}
- /* FIXME: Should we do header_uptodate and return retry here? */
- return tdb_nest_lock(tdb, TDB_HASH_LOCK_START + list, ltype, waitflag);
+again:
+ if (tdb_nest_lock(tdb, TDB_HASH_LOCK_START + list, ltype, waitflag))
+ return TDB_OFF_ERR;
+
+ if (can_change && unlikely(header_changed(tdb))) {
+ tdb_off_t new = hash & ((1ULL << tdb->header.v.hash_bits) - 1);
+ if (new != list) {
+ tdb_nest_unlock(tdb, TDB_HASH_LOCK_START+list, ltype);
+ list = new;
+ goto again;
+ }
+ }
+ return list;
}
int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype)
{
+ list &= ((1ULL << tdb->header.v.hash_bits) - 1);
+
/* a allrecord lock allows us to avoid per chain locks */
if (tdb->allrecord_lock.count) {
if (tdb->allrecord_lock.ltype == F_RDLCK
}
}
-/* Free list locks come after hash locks */
-int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist,
- enum tdb_lock_flags waitflag)
+/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
+ * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
+ * The result is that on 32 bit systems we don't use lock values > 2^31 on
+ * files that are less than 4GB.
+ */
+static tdb_off_t free_lock_off(tdb_off_t b_off)
{
+ return TDB_HASH_LOCK_START + (1 << 30) + b_off / sizeof(tdb_off_t);
+}
+
+int tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
+ enum tdb_lock_flags waitflag)
+{
+ assert(b_off >= sizeof(struct tdb_header));
+
/* You're supposed to have a hash lock first! */
if (!(tdb->flags & TDB_NOLOCK) && !tdb_has_locks(tdb)) {
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- return tdb_nest_lock(tdb, TDB_HASH_LOCK_START
- + (1ULL << tdb->header.v.hash_bits)
- + flist, F_WRLCK, waitflag);
+ return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
}
-void tdb_unlock_free_list(struct tdb_context *tdb, tdb_off_t flist)
+void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
{
if (tdb->allrecord_lock.count)
return;
- tdb_nest_unlock(tdb, TDB_HASH_LOCK_START
- + (1ULL << tdb->header.v.hash_bits)
- + flist, F_WRLCK);
+ tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
}
-#if 0
-static int chainlock_loop(struct tdb_context *tdb, const TDB_DATA *key,
- int ltype, enum tdb_lock_flags waitflag,
- const char *func)
+/* Even if the entry isn't in this hash bucket, you'd have to lock this
+ * bucket to find it. */
+static int chainlock(struct tdb_context *tdb, const TDB_DATA *key,
+ int ltype, enum tdb_lock_flags waitflag,
+ const char *func)
{
int ret;
uint64_t h = tdb_hash(tdb, key->dptr, key->dsize);
-again:
- ret = tdb_lock_list(tdb,
- h & ((1ULL << tdb->header.v.hash_bits) - 1),
- ltype, waitflag);
- if (likely(ret == 0) && unlikely(update_header(tdb))) {
- tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1),
- ltype);
- goto again;
- }
-
+ ret = tdb_lock_list(tdb, h, ltype, waitflag) == TDB_OFF_ERR ? -1 : 0;
tdb_trace_1rec(tdb, func, *key);
return ret;
}
contention - it cannot guarantee how many records will be locked */
int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key)
{
- return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_WAIT,
- "tdb_chainlock");
+ return chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT, "tdb_chainlock");
}
+int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
+{
+ uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
+ tdb_trace_1rec(tdb, "tdb_chainunlock", key);
+ return tdb_unlock_list(tdb, h, F_WRLCK);
+}
+
+#if 0
/* lock/unlock one hash chain, non-blocking. This is meant to be used
to reduce contention - it cannot guarantee how many records will be
locked */
int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
{
- return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_NOWAIT,
- "tdb_chainlock_nonblock");
-}
-
-int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
-{
- uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
- tdb_trace_1rec(tdb, "tdb_chainunlock", key);
- return tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1),
- F_WRLCK);
+ return chainlock(tdb, &key, F_WRLCK, TDB_LOCK_NOWAIT,
+ "tdb_chainlock_nonblock");
}
int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
{
- return chainlock_loop(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
- "tdb_chainlock_read");
+ return chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT,
+ "tdb_chainlock_read");
}
int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)