git.ozlabs.org Git - ccan/blob - ccan/tdb2/lock.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "private.h"
  29 #include <assert.h>
  30 #include <ccan/build_assert/build_assert.h>
  31
  32 /* If we were threaded, we could wait for unlock, but we're not, so fail. */
  33 static bool owner_conflict(struct tdb_context *tdb, struct tdb_lock *lock)
  34 {
  35         if (lock->owner != tdb) {
  36                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  37                            "Lock already owned by another opener");
  38                 return true;
  39         }
  40         return false;
  41 }
  42
  43 static int fcntl_lock(struct tdb_context *tdb,
  44                       int rw, off_t off, off_t len, bool waitflag)
  45 {
  46         struct flock fl;
  47
  48         fl.l_type = rw;
  49         fl.l_whence = SEEK_SET;
  50         fl.l_start = off;
  51         fl.l_len = len;
  52         fl.l_pid = 0;
  53
  54         add_stat(tdb, lock_lowlevel, 1);
  55         if (waitflag)
  56                 return fcntl(tdb->file->fd, F_SETLKW, &fl);
  57         else {
  58                 add_stat(tdb, lock_nonblock, 1);
  59                 return fcntl(tdb->file->fd, F_SETLK, &fl);
  60         }
  61 }
  62
  63 static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
  64 {
  65         struct flock fl;
  66 #if 0 /* Check they matched up locks and unlocks correctly. */
  67         char line[80];
  68         FILE *locks;
  69         bool found = false;
  70
  71         locks = fopen("/proc/locks", "r");
  72
  73         while (fgets(line, 80, locks)) {
  74                 char *p;
  75                 int type, start, l;
  76
  77                 /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
  78                 p = strchr(line, ':') + 1;
  79                 if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
  80                         continue;
  81                 p += strlen(" FLOCK  ADVISORY  ");
  82                 if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
  83                         type = F_RDLCK;
  84                 else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
  85                         type = F_WRLCK;
  86                 else
  87                         abort();
  88                 p += 6;
  89                 if (atoi(p) != getpid())
  90                         continue;
  91                 p = strchr(strchr(p, ' ') + 1, ' ') + 1;
  92                 start = atoi(p);
  93                 p = strchr(p, ' ') + 1;
  94                 if (strncmp(p, "EOF", 3) == 0)
  95                         l = 0;
  96                 else
  97                         l = atoi(p) - start + 1;
  98
  99                 if (off == start) {
 100                         if (len != l) {
 101                                 fprintf(stderr, "Len %u should be %u: %s",
 102                                         (int)len, l, line);
 103                                 abort();
 104                         }
 105                         if (type != rw) {
 106                                 fprintf(stderr, "Type %s wrong: %s",
 107                                         rw == F_RDLCK ? "READ" : "WRITE", line);
 108                                 abort();
 109                         }
 110                         found = true;
 111                         break;
 112                 }
 113         }
 114
 115         if (!found) {
 116                 fprintf(stderr, "Unlock on %u@%u not found!",
 117                         (int)off, (int)len);
 118                 abort();
 119         }
 120
 121         fclose(locks);
 122 #endif
 123
 124         fl.l_type = F_UNLCK;
 125         fl.l_whence = SEEK_SET;
 126         fl.l_start = off;
 127         fl.l_len = len;
 128         fl.l_pid = 0;
 129
 130         return fcntl(tdb->file->fd, F_SETLKW, &fl);
 131 }
 132
 133 /* a byte range locking function - return 0 on success
 134    this functions locks len bytes at the specified offset.
 135
 136    note that a len of zero means lock to end of file
 137 */
 138 static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
 139                                  int rw_type, tdb_off_t offset, tdb_off_t len,
 140                                  enum tdb_lock_flags flags)
 141 {
 142         int ret;
 143
 144         if (tdb->flags & TDB_NOLOCK) {
 145                 return TDB_SUCCESS;
 146         }
 147
 148         if (rw_type == F_WRLCK && tdb->read_only) {
 149                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
 150                                   "Write lock attempted on read-only database");
 151         }
 152
 153         /* A 32 bit system cannot open a 64-bit file, but it could have
 154          * expanded since then: check here. */
 155         if ((size_t)(offset + len) != offset + len) {
 156                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
 157                                   "tdb_brlock: lock on giant offset %llu",
 158                                   (long long)(offset + len));
 159         }
 160
 161         do {
 162                 ret = fcntl_lock(tdb, rw_type, offset, len,
 163                                  flags & TDB_LOCK_WAIT);
 164         } while (ret == -1 && errno == EINTR);
 165
 166         if (ret == -1) {
 167                 /* Generic lock error. errno set by fcntl.
 168                  * EAGAIN is an expected return from non-blocking
 169                  * locks. */
 170                 if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
 171                         tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 172                                    "tdb_brlock failed (fd=%d) at"
 173                                    " offset %zu rw_type=%d flags=%d len=%zu:"
 174                                    " %s",
 175                                    tdb->file->fd, (size_t)offset, rw_type,
 176                                    flags, (size_t)len, strerror(errno));
 177                 }
 178                 return TDB_ERR_LOCK;
 179         }
 180         return TDB_SUCCESS;
 181 }
 182
 183 static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
 184                                    int rw_type, tdb_off_t offset, size_t len)
 185 {
 186         int ret;
 187
 188         if (tdb->flags & TDB_NOLOCK) {
 189                 return TDB_SUCCESS;
 190         }
 191
 192         do {
 193                 ret = fcntl_unlock(tdb, rw_type, offset, len);
 194         } while (ret == -1 && errno == EINTR);
 195
 196         if (ret == -1) {
 197                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 198                                   "tdb_brunlock failed (fd=%d) at offset %zu"
 199                                   " rw_type=%d len=%zu",
 200                                   tdb->file->fd, (size_t)offset, rw_type,
 201                                   (size_t)len);
 202         }
 203         return TDB_SUCCESS;
 204 }
 205
 206 /*
 207   upgrade a read lock to a write lock. This needs to be handled in a
 208   special way as some OSes (such as solaris) have too conservative
 209   deadlock detection and claim a deadlock when progress can be
 210   made. For those OSes we may loop for a while.
 211 */
 212 enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb)
 213 {
 214         int count = 1000;
 215
 216         if (tdb->file->allrecord_lock.count != 1) {
 217                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 218                                   "tdb_allrecord_upgrade failed:"
 219                                   " count %u too high",
 220                                   tdb->file->allrecord_lock.count);
 221         }
 222
 223         if (tdb->file->allrecord_lock.off != 1) {
 224                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 225                                   "tdb_allrecord_upgrade failed:"
 226                                   " already upgraded?");
 227         }
 228
 229         while (count--) {
 230                 struct timeval tv;
 231                 if (tdb_brlock(tdb, F_WRLCK,
 232                                TDB_HASH_LOCK_START, 0,
 233                                TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
 234                         tdb->file->allrecord_lock.ltype = F_WRLCK;
 235                         tdb->file->allrecord_lock.off = 0;
 236                         return TDB_SUCCESS;
 237                 }
 238                 if (errno != EDEADLK) {
 239                         break;
 240                 }
 241                 /* sleep for as short a time as we can - more portable than usleep() */
 242                 tv.tv_sec = 0;
 243                 tv.tv_usec = 1;
 244                 select(0, NULL, NULL, NULL, &tv);
 245         }
 246         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 247                           "tdb_allrecord_upgrade failed");
 248 }
 249
 250 static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
 251                                       const struct tdb_context *owner)
 252 {
 253         unsigned int i;
 254
 255         for (i=0; i<tdb->file->num_lockrecs; i++) {
 256                 if (tdb->file->lockrecs[i].off == offset) {
 257                         if (owner && tdb->file->lockrecs[i].owner != owner)
 258                                 return NULL;
 259                         return &tdb->file->lockrecs[i];
 260                 }
 261         }
 262         return NULL;
 263 }
 264
 265 enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
 266 {
 267         enum TDB_ERROR ecode;
 268
 269         ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
 270                                    false);
 271         if (ecode != TDB_SUCCESS) {
 272                 return ecode;
 273         }
 274
 275         ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
 276         if (ecode != TDB_SUCCESS) {
 277                 tdb_allrecord_unlock(tdb, F_WRLCK);
 278                 return ecode;
 279         }
 280         ecode = tdb_transaction_recover(tdb);
 281         tdb_unlock_open(tdb);
 282         tdb_allrecord_unlock(tdb, F_WRLCK);
 283
 284         return ecode;
 285 }
 286
 287 /* lock an offset in the database. */
 288 static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
 289                                     tdb_off_t offset, int ltype,
 290                                     enum tdb_lock_flags flags)
 291 {
 292         struct tdb_lock *new_lck;
 293         enum TDB_ERROR ecode;
 294
 295         if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
 296                       + tdb->file->map_size / 8)) {
 297                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 298                                   "tdb_nest_lock: invalid offset %zu ltype=%d",
 299                                   (size_t)offset, ltype);
 300         }
 301
 302         if (tdb->flags & TDB_NOLOCK)
 303                 return TDB_SUCCESS;
 304
 305         add_stat(tdb, locks, 1);
 306
 307         new_lck = find_nestlock(tdb, offset, NULL);
 308         if (new_lck) {
 309                 if (owner_conflict(tdb, new_lck))
 310                         return -1;
 311
 312                 if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
 313                         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 314                                           "tdb_nest_lock:"
 315                                           " offset %zu has read lock",
 316                                           (size_t)offset);
 317                 }
 318                 /* Just increment the struct, posix locks don't stack. */
 319                 new_lck->count++;
 320                 return TDB_SUCCESS;
 321         }
 322
 323         if (tdb->file->num_lockrecs
 324             && offset >= TDB_HASH_LOCK_START
 325             && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
 326                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 327                                   "tdb_nest_lock: already have a hash lock?");
 328         }
 329
 330         new_lck = (struct tdb_lock *)realloc(
 331                 tdb->file->lockrecs,
 332                 sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
 333         if (new_lck == NULL) {
 334                 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
 335                                   "tdb_nest_lock:"
 336                                   " unable to allocate %zu lock struct",
 337                                   tdb->file->num_lockrecs + 1);
 338         }
 339         tdb->file->lockrecs = new_lck;
 340
 341         /* Since fcntl locks don't nest, we do a lock for the first one,
 342            and simply bump the count for future ones */
 343         ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
 344         if (ecode != TDB_SUCCESS) {
 345                 return ecode;
 346         }
 347
 348         /* First time we grab a lock, perhaps someone died in commit? */
 349         if (!(flags & TDB_LOCK_NOCHECK)
 350             && tdb->file->num_lockrecs == 0) {
 351                 tdb_bool_err berr = tdb_needs_recovery(tdb);
 352                 if (berr != false) {
 353                         tdb_brunlock(tdb, ltype, offset, 1);
 354
 355                         if (berr < 0)
 356                                 return berr;
 357                         ecode = tdb_lock_and_recover(tdb);
 358                         if (ecode == TDB_SUCCESS) {
 359                                 ecode = tdb_brlock(tdb, ltype, offset, 1,
 360                                                    flags);
 361                         }
 362                         if (ecode != TDB_SUCCESS) {
 363                                 return ecode;
 364                         }
 365                 }
 366         }
 367
 368         tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
 369         tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
 370         tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
 371         tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
 372         tdb->file->num_lockrecs++;
 373
 374         return TDB_SUCCESS;
 375 }
 376
 377 static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
 378                                       tdb_off_t off, int ltype)
 379 {
 380         struct tdb_lock *lck;
 381         enum TDB_ERROR ecode;
 382
 383         if (tdb->flags & TDB_NOLOCK)
 384                 return TDB_SUCCESS;
 385
 386         lck = find_nestlock(tdb, off, tdb);
 387         if ((lck == NULL) || (lck->count == 0)) {
 388                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 389                                   "tdb_nest_unlock: no lock for %zu",
 390                                   (size_t)off);
 391         }
 392
 393         if (lck->count > 1) {
 394                 lck->count--;
 395                 return TDB_SUCCESS;
 396         }
 397
 398         /*
 399          * This lock has count==1 left, so we need to unlock it in the
 400          * kernel. We don't bother with decrementing the in-memory array
 401          * element, we're about to overwrite it with the last array element
 402          * anyway.
 403          */
 404         ecode = tdb_brunlock(tdb, ltype, off, 1);
 405
 406         /*
 407          * Shrink the array by overwriting the element just unlocked with the
 408          * last array element.
 409          */
 410         *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
 411
 412         return ecode;
 413 }
 414
 415 /*
 416   get the transaction lock
 417  */
 418 enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
 419 {
 420         return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
 421 }
 422
 423 /*
 424   release the transaction lock
 425  */
 426 void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
 427 {
 428         tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
 429 }
 430
 431 /* We only need to lock individual bytes, but Linux merges consecutive locks
 432  * so we lock in contiguous ranges. */
 433 static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
 434                                        int ltype, enum tdb_lock_flags flags,
 435                                        tdb_off_t off, tdb_off_t len)
 436 {
 437         enum TDB_ERROR ecode;
 438         enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
 439
 440         if (len <= 1) {
 441                 /* 0 would mean to end-of-file... */
 442                 assert(len != 0);
 443                 /* Single hash.  Just do blocking lock. */
 444                 return tdb_brlock(tdb, ltype, off, len, flags);
 445         }
 446
 447         /* First we try non-blocking. */
 448         if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) {
 449                 return TDB_SUCCESS;
 450         }
 451
 452         /* Try locking first half, then second. */
 453         ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
 454         if (ecode != TDB_SUCCESS)
 455                 return ecode;
 456
 457         ecode = tdb_lock_gradual(tdb, ltype, flags,
 458                                  off + len / 2, len - len / 2);
 459         if (ecode != TDB_SUCCESS) {
 460                 tdb_brunlock(tdb, ltype, off, len / 2);
 461         }
 462         return ecode;
 463 }
 464
 465 /* lock/unlock entire database.  It can only be upgradable if you have some
 466  * other way of guaranteeing exclusivity (ie. transaction write lock). */
 467 enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 468                                   enum tdb_lock_flags flags, bool upgradable)
 469 {
 470         enum TDB_ERROR ecode;
 471         tdb_bool_err berr;
 472
 473         if (tdb->file->allrecord_lock.count
 474             && (ltype == F_RDLCK
 475                 || tdb->file->allrecord_lock.ltype == F_WRLCK)) {
 476                 tdb->file->allrecord_lock.count++;
 477                 return TDB_SUCCESS;
 478         }
 479
 480         if (tdb->file->allrecord_lock.count) {
 481                 /* a global lock of a different type exists */
 482                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 483                                   "tdb_allrecord_lock: already have %s lock",
 484                                   tdb->file->allrecord_lock.ltype == F_RDLCK
 485                                   ? "read" : "write");
 486         }
 487
 488         if (tdb_has_hash_locks(tdb)) {
 489                 /* can't combine global and chain locks */
 490                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 491                                   "tdb_allrecord_lock:"
 492                                   " already have chain lock");
 493         }
 494
 495         if (upgradable && ltype != F_RDLCK) {
 496                 /* tdb error: you can't upgrade a write lock! */
 497                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 498                                   "tdb_allrecord_lock:"
 499                                   " can't upgrade a write lock");
 500         }
 501
 502         add_stat(tdb, locks, 1);
 503 again:
 504         /* Lock hashes, gradually. */
 505         ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
 506                                  TDB_HASH_LOCK_RANGE);
 507         if (ecode != TDB_SUCCESS) {
 508                 if (!(flags & TDB_LOCK_PROBE)) {
 509                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
 510                                    "tdb_allrecord_lock hashes failed");
 511                 }
 512                 return ecode;
 513         }
 514
 515         /* Lock free tables: there to end of file. */
 516         ecode = tdb_brlock(tdb, ltype,
 517                            TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
 518                            0, flags);
 519         if (ecode != TDB_SUCCESS) {
 520                 if (!(flags & TDB_LOCK_PROBE)) {
 521                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
 522                                  "tdb_allrecord_lock freetables failed");
 523                 }
 524                 tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
 525                              TDB_HASH_LOCK_RANGE);
 526                 return ecode;
 527         }
 528
 529         tdb->file->allrecord_lock.owner = tdb;
 530         tdb->file->allrecord_lock.count = 1;
 531         /* If it's upgradable, it's actually exclusive so we can treat
 532          * it as a write lock. */
 533         tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
 534         tdb->file->allrecord_lock.off = upgradable;
 535
 536         /* Now check for needing recovery. */
 537         if (flags & TDB_LOCK_NOCHECK)
 538                 return TDB_SUCCESS;
 539
 540         berr = tdb_needs_recovery(tdb);
 541         if (likely(berr == false))
 542                 return TDB_SUCCESS;
 543
 544         tdb_allrecord_unlock(tdb, ltype);
 545         if (berr < 0)
 546                 return berr;
 547         ecode = tdb_lock_and_recover(tdb);
 548         if (ecode != TDB_SUCCESS) {
 549                 return ecode;
 550         }
 551         goto again;
 552 }
 553
 554 enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, enum tdb_lock_flags flags)
 555 {
 556         return tdb_nest_lock(tdb, TDB_OPEN_LOCK, F_WRLCK, flags);
 557 }
 558
 559 void tdb_unlock_open(struct tdb_context *tdb)
 560 {
 561         tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
 562 }
 563
 564 bool tdb_has_open_lock(struct tdb_context *tdb)
 565 {
 566         return !(tdb->flags & TDB_NOLOCK)
 567                 && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
 568 }
 569
 570 enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
 571 {
 572         /* Lock doesn't protect data, so don't check (we recurse if we do!) */
 573         return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
 574                              TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
 575 }
 576
 577 void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
 578 {
 579         tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
 580 }
 581
 582 /* unlock entire db */
 583 void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
 584 {
 585         if (tdb->file->allrecord_lock.count == 0) {
 586                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 587                            "tdb_allrecord_unlock: not locked!");
 588                 return;
 589         }
 590
 591         if (tdb->file->allrecord_lock.owner != tdb) {
 592                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 593                            "tdb_allrecord_unlock: not locked by us!");
 594                 return;
 595         }
 596
 597         /* Upgradable locks are marked as write locks. */
 598         if (tdb->file->allrecord_lock.ltype != ltype
 599             && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
 600                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 601                            "tdb_allrecord_unlock: have %s lock",
 602                            tdb->file->allrecord_lock.ltype == F_RDLCK
 603                            ? "read" : "write");
 604                 return;
 605         }
 606
 607         if (tdb->file->allrecord_lock.count > 1) {
 608                 tdb->file->allrecord_lock.count--;
 609                 return;
 610         }
 611
 612         tdb->file->allrecord_lock.count = 0;
 613         tdb->file->allrecord_lock.ltype = 0;
 614
 615         tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
 616 }
 617
 618 bool tdb_has_expansion_lock(struct tdb_context *tdb)
 619 {
 620         return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
 621 }
 622
 623 bool tdb_has_hash_locks(struct tdb_context *tdb)
 624 {
 625         unsigned int i;
 626
 627         for (i=0; i<tdb->file->num_lockrecs; i++) {
 628                 if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
 629                     && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
 630                                                      + TDB_HASH_LOCK_RANGE))
 631                         return true;
 632         }
 633         return false;
 634 }
 635
 636 static bool tdb_has_free_lock(struct tdb_context *tdb)
 637 {
 638         unsigned int i;
 639
 640         if (tdb->flags & TDB_NOLOCK)
 641                 return false;
 642
 643         for (i=0; i<tdb->file->num_lockrecs; i++) {
 644                 if (tdb->file->lockrecs[i].off
 645                     > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
 646                         return true;
 647         }
 648         return false;
 649 }
 650
 651 enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
 652                                tdb_off_t hash_lock,
 653                                tdb_len_t hash_range,
 654                                int ltype, enum tdb_lock_flags waitflag)
 655 {
 656         /* FIXME: Do this properly, using hlock_range */
 657         unsigned lock = TDB_HASH_LOCK_START
 658                 + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
 659
 660         /* a allrecord lock allows us to avoid per chain locks */
 661         if (tdb->file->allrecord_lock.count &&
 662             (ltype == tdb->file->allrecord_lock.ltype || ltype == F_RDLCK)) {
 663                 return TDB_SUCCESS;
 664         }
 665
 666         if (tdb->file->allrecord_lock.count) {
 667                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 668                                   "tdb_lock_hashes:"
 669                                   " already have %s allrecordlock",
 670                                   tdb->file->allrecord_lock.ltype == F_RDLCK
 671                                   ? "read" : "write");
 672         }
 673
 674         if (tdb_has_free_lock(tdb)) {
 675                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 676                                   "tdb_lock_hashes: already have free lock");
 677         }
 678
 679         if (tdb_has_expansion_lock(tdb)) {
 680                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 681                                   "tdb_lock_hashes:"
 682                                   " already have expansion lock");
 683         }
 684
 685         return tdb_nest_lock(tdb, lock, ltype, waitflag);
 686 }
 687
 688 enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
 689                                  tdb_off_t hash_lock,
 690                                  tdb_len_t hash_range, int ltype)
 691 {
 692         unsigned lock = TDB_HASH_LOCK_START
 693                 + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
 694
 695         if (tdb->flags & TDB_NOLOCK)
 696                 return 0;
 697
 698         /* a allrecord lock allows us to avoid per chain locks */
 699         if (tdb->file->allrecord_lock.count) {
 700                 if (tdb->file->allrecord_lock.ltype == F_RDLCK
 701                     && ltype == F_WRLCK) {
 702                         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 703                                           "tdb_unlock_hashes RO allrecord!");
 704                 }
 705                 return TDB_SUCCESS;
 706         }
 707
 708         return tdb_nest_unlock(tdb, lock, ltype);
 709 }
 710
 711 /* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
 712  * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
 713  * The result is that on 32 bit systems we don't use lock values > 2^31 on
 714  * files that are less than 4GB.
 715  */
 716 static tdb_off_t free_lock_off(tdb_off_t b_off)
 717 {
 718         return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
 719                 + b_off / sizeof(tdb_off_t);
 720 }
 721
 722 enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
 723                                     enum tdb_lock_flags waitflag)
 724 {
 725         assert(b_off >= sizeof(struct tdb_header));
 726
 727         if (tdb->flags & TDB_NOLOCK)
 728                 return 0;
 729
 730         /* a allrecord lock allows us to avoid per chain locks */
 731         if (tdb->file->allrecord_lock.count) {
 732                 if (tdb->file->allrecord_lock.ltype == F_WRLCK)
 733                         return 0;
 734                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 735                                   "tdb_lock_free_bucket with"
 736                                   " read-only allrecordlock!");
 737         }
 738
 739 #if 0 /* FIXME */
 740         if (tdb_has_expansion_lock(tdb)) {
 741                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 742                                   "tdb_lock_free_bucket:"
 743                                   " already have expansion lock");
 744         }
 745 #endif
 746
 747         return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
 748 }
 749
 750 void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
 751 {
 752         if (tdb->file->allrecord_lock.count)
 753                 return;
 754
 755         tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
 756 }
 757
 758 void tdb_unlock_all(struct tdb_context *tdb)
 759 {
 760         unsigned int i;
 761
 762         while (tdb->file->allrecord_lock.count
 763                && tdb->file->allrecord_lock.owner == tdb) {
 764                 tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
 765         }
 766
 767         for (i=0; i<tdb->file->num_lockrecs; i++) {
 768                 if (tdb->file->lockrecs[i].owner == tdb) {
 769                         tdb_nest_unlock(tdb,
 770                                         tdb->file->lockrecs[i].off,
 771                                         tdb->file->lockrecs[i].ltype);
 772                         i--;
 773                 }
 774         }
 775 }