git.ozlabs.org Git - ccan/blob - ccan/tdb2/lock.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "private.h"
  29 #include <assert.h>
  30 #include <ccan/build_assert/build_assert.h>
  31
  32 /* If we were threaded, we could wait for unlock, but we're not, so fail. */
  33 static enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
  34 {
  35         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  36                           "%s: lock owned by another tdb in this process.",
  37                           call);
  38 }
  39
  40 static int fcntl_lock(struct tdb_context *tdb,
  41                       int rw, off_t off, off_t len, bool waitflag)
  42 {
  43         struct flock fl;
  44
  45         fl.l_type = rw;
  46         fl.l_whence = SEEK_SET;
  47         fl.l_start = off;
  48         fl.l_len = len;
  49         fl.l_pid = 0;
  50
  51         add_stat(tdb, lock_lowlevel, 1);
  52         if (waitflag)
  53                 return fcntl(tdb->file->fd, F_SETLKW, &fl);
  54         else {
  55                 add_stat(tdb, lock_nonblock, 1);
  56                 return fcntl(tdb->file->fd, F_SETLK, &fl);
  57         }
  58 }
  59
  60 static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
  61 {
  62         struct flock fl;
  63 #if 0 /* Check they matched up locks and unlocks correctly. */
  64         char line[80];
  65         FILE *locks;
  66         bool found = false;
  67
  68         locks = fopen("/proc/locks", "r");
  69
  70         while (fgets(line, 80, locks)) {
  71                 char *p;
  72                 int type, start, l;
  73
  74                 /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
  75                 p = strchr(line, ':') + 1;
  76                 if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
  77                         continue;
  78                 p += strlen(" FLOCK  ADVISORY  ");
  79                 if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
  80                         type = F_RDLCK;
  81                 else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
  82                         type = F_WRLCK;
  83                 else
  84                         abort();
  85                 p += 6;
  86                 if (atoi(p) != getpid())
  87                         continue;
  88                 p = strchr(strchr(p, ' ') + 1, ' ') + 1;
  89                 start = atoi(p);
  90                 p = strchr(p, ' ') + 1;
  91                 if (strncmp(p, "EOF", 3) == 0)
  92                         l = 0;
  93                 else
  94                         l = atoi(p) - start + 1;
  95
  96                 if (off == start) {
  97                         if (len != l) {
  98                                 fprintf(stderr, "Len %u should be %u: %s",
  99                                         (int)len, l, line);
 100                                 abort();
 101                         }
 102                         if (type != rw) {
 103                                 fprintf(stderr, "Type %s wrong: %s",
 104                                         rw == F_RDLCK ? "READ" : "WRITE", line);
 105                                 abort();
 106                         }
 107                         found = true;
 108                         break;
 109                 }
 110         }
 111
 112         if (!found) {
 113                 fprintf(stderr, "Unlock on %u@%u not found!",
 114                         (int)off, (int)len);
 115                 abort();
 116         }
 117
 118         fclose(locks);
 119 #endif
 120
 121         fl.l_type = F_UNLCK;
 122         fl.l_whence = SEEK_SET;
 123         fl.l_start = off;
 124         fl.l_len = len;
 125         fl.l_pid = 0;
 126
 127         return fcntl(tdb->file->fd, F_SETLKW, &fl);
 128 }
 129
 130 /* a byte range locking function - return 0 on success
 131    this functions locks len bytes at the specified offset.
 132
 133    note that a len of zero means lock to end of file
 134 */
 135 static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
 136                                  int rw_type, tdb_off_t offset, tdb_off_t len,
 137                                  enum tdb_lock_flags flags)
 138 {
 139         int ret;
 140
 141         if (tdb->flags & TDB_NOLOCK) {
 142                 return TDB_SUCCESS;
 143         }
 144
 145         if (rw_type == F_WRLCK && tdb->read_only) {
 146                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
 147                                   "Write lock attempted on read-only database");
 148         }
 149
 150         /* A 32 bit system cannot open a 64-bit file, but it could have
 151          * expanded since then: check here. */
 152         if ((size_t)(offset + len) != offset + len) {
 153                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
 154                                   "tdb_brlock: lock on giant offset %llu",
 155                                   (long long)(offset + len));
 156         }
 157
 158         do {
 159                 ret = fcntl_lock(tdb, rw_type, offset, len,
 160                                  flags & TDB_LOCK_WAIT);
 161         } while (ret == -1 && errno == EINTR);
 162
 163         if (ret == -1) {
 164                 /* Generic lock error. errno set by fcntl.
 165                  * EAGAIN is an expected return from non-blocking
 166                  * locks. */
 167                 if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
 168                         tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 169                                    "tdb_brlock failed (fd=%d) at"
 170                                    " offset %zu rw_type=%d flags=%d len=%zu:"
 171                                    " %s",
 172                                    tdb->file->fd, (size_t)offset, rw_type,
 173                                    flags, (size_t)len, strerror(errno));
 174                 }
 175                 return TDB_ERR_LOCK;
 176         }
 177         return TDB_SUCCESS;
 178 }
 179
 180 static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
 181                                    int rw_type, tdb_off_t offset, size_t len)
 182 {
 183         int ret;
 184
 185         if (tdb->flags & TDB_NOLOCK) {
 186                 return TDB_SUCCESS;
 187         }
 188
 189         do {
 190                 ret = fcntl_unlock(tdb, rw_type, offset, len);
 191         } while (ret == -1 && errno == EINTR);
 192
 193         if (ret == -1) {
 194                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 195                                   "tdb_brunlock failed (fd=%d) at offset %zu"
 196                                   " rw_type=%d len=%zu",
 197                                   tdb->file->fd, (size_t)offset, rw_type,
 198                                   (size_t)len);
 199         }
 200         return TDB_SUCCESS;
 201 }
 202
 203 /*
 204   upgrade a read lock to a write lock. This needs to be handled in a
 205   special way as some OSes (such as solaris) have too conservative
 206   deadlock detection and claim a deadlock when progress can be
 207   made. For those OSes we may loop for a while.
 208 */
 209 enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb)
 210 {
 211         int count = 1000;
 212
 213         if (tdb->file->allrecord_lock.count != 1) {
 214                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 215                                   "tdb_allrecord_upgrade failed:"
 216                                   " count %u too high",
 217                                   tdb->file->allrecord_lock.count);
 218         }
 219
 220         if (tdb->file->allrecord_lock.off != 1) {
 221                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 222                                   "tdb_allrecord_upgrade failed:"
 223                                   " already upgraded?");
 224         }
 225
 226         if (tdb->file->allrecord_lock.owner != tdb) {
 227                 return owner_conflict(tdb, "tdb_allrecord_upgrade");
 228         }
 229
 230         while (count--) {
 231                 struct timeval tv;
 232                 if (tdb_brlock(tdb, F_WRLCK,
 233                                TDB_HASH_LOCK_START, 0,
 234                                TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
 235                         tdb->file->allrecord_lock.ltype = F_WRLCK;
 236                         tdb->file->allrecord_lock.off = 0;
 237                         return TDB_SUCCESS;
 238                 }
 239                 if (errno != EDEADLK) {
 240                         break;
 241                 }
 242                 /* sleep for as short a time as we can - more portable than usleep() */
 243                 tv.tv_sec = 0;
 244                 tv.tv_usec = 1;
 245                 select(0, NULL, NULL, NULL, &tv);
 246         }
 247         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 248                           "tdb_allrecord_upgrade failed");
 249 }
 250
 251 static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
 252                                       const struct tdb_context *owner)
 253 {
 254         unsigned int i;
 255
 256         for (i=0; i<tdb->file->num_lockrecs; i++) {
 257                 if (tdb->file->lockrecs[i].off == offset) {
 258                         if (owner && tdb->file->lockrecs[i].owner != owner)
 259                                 return NULL;
 260                         return &tdb->file->lockrecs[i];
 261                 }
 262         }
 263         return NULL;
 264 }
 265
 266 enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
 267 {
 268         enum TDB_ERROR ecode;
 269
 270         ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
 271                                    false);
 272         if (ecode != TDB_SUCCESS) {
 273                 return ecode;
 274         }
 275
 276         ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
 277         if (ecode != TDB_SUCCESS) {
 278                 tdb_allrecord_unlock(tdb, F_WRLCK);
 279                 return ecode;
 280         }
 281         ecode = tdb_transaction_recover(tdb);
 282         tdb_unlock_open(tdb);
 283         tdb_allrecord_unlock(tdb, F_WRLCK);
 284
 285         return ecode;
 286 }
 287
 288 /* lock an offset in the database. */
 289 static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
 290                                     tdb_off_t offset, int ltype,
 291                                     enum tdb_lock_flags flags)
 292 {
 293         struct tdb_lock *new_lck;
 294         enum TDB_ERROR ecode;
 295
 296         if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
 297                       + tdb->file->map_size / 8)) {
 298                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 299                                   "tdb_nest_lock: invalid offset %zu ltype=%d",
 300                                   (size_t)offset, ltype);
 301         }
 302
 303         if (tdb->flags & TDB_NOLOCK)
 304                 return TDB_SUCCESS;
 305
 306         add_stat(tdb, locks, 1);
 307
 308         new_lck = find_nestlock(tdb, offset, NULL);
 309         if (new_lck) {
 310                 if (new_lck->owner != tdb) {
 311                         return owner_conflict(tdb, "tdb_nest_lock");
 312                 }
 313
 314                 if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
 315                         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 316                                           "tdb_nest_lock:"
 317                                           " offset %zu has read lock",
 318                                           (size_t)offset);
 319                 }
 320                 /* Just increment the struct, posix locks don't stack. */
 321                 new_lck->count++;
 322                 return TDB_SUCCESS;
 323         }
 324
 325         if (tdb->file->num_lockrecs
 326             && offset >= TDB_HASH_LOCK_START
 327             && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
 328                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 329                                   "tdb_nest_lock: already have a hash lock?");
 330         }
 331
 332         new_lck = (struct tdb_lock *)realloc(
 333                 tdb->file->lockrecs,
 334                 sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
 335         if (new_lck == NULL) {
 336                 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
 337                                   "tdb_nest_lock:"
 338                                   " unable to allocate %zu lock struct",
 339                                   tdb->file->num_lockrecs + 1);
 340         }
 341         tdb->file->lockrecs = new_lck;
 342
 343         /* Since fcntl locks don't nest, we do a lock for the first one,
 344            and simply bump the count for future ones */
 345         ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
 346         if (ecode != TDB_SUCCESS) {
 347                 return ecode;
 348         }
 349
 350         /* First time we grab a lock, perhaps someone died in commit? */
 351         if (!(flags & TDB_LOCK_NOCHECK)
 352             && tdb->file->num_lockrecs == 0) {
 353                 tdb_bool_err berr = tdb_needs_recovery(tdb);
 354                 if (berr != false) {
 355                         tdb_brunlock(tdb, ltype, offset, 1);
 356
 357                         if (berr < 0)
 358                                 return berr;
 359                         ecode = tdb_lock_and_recover(tdb);
 360                         if (ecode == TDB_SUCCESS) {
 361                                 ecode = tdb_brlock(tdb, ltype, offset, 1,
 362                                                    flags);
 363                         }
 364                         if (ecode != TDB_SUCCESS) {
 365                                 return ecode;
 366                         }
 367                 }
 368         }
 369
 370         tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
 371         tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
 372         tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
 373         tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
 374         tdb->file->num_lockrecs++;
 375
 376         return TDB_SUCCESS;
 377 }
 378
 379 static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
 380                                       tdb_off_t off, int ltype)
 381 {
 382         struct tdb_lock *lck;
 383         enum TDB_ERROR ecode;
 384
 385         if (tdb->flags & TDB_NOLOCK)
 386                 return TDB_SUCCESS;
 387
 388         lck = find_nestlock(tdb, off, tdb);
 389         if ((lck == NULL) || (lck->count == 0)) {
 390                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 391                                   "tdb_nest_unlock: no lock for %zu",
 392                                   (size_t)off);
 393         }
 394
 395         if (lck->count > 1) {
 396                 lck->count--;
 397                 return TDB_SUCCESS;
 398         }
 399
 400         /*
 401          * This lock has count==1 left, so we need to unlock it in the
 402          * kernel. We don't bother with decrementing the in-memory array
 403          * element, we're about to overwrite it with the last array element
 404          * anyway.
 405          */
 406         ecode = tdb_brunlock(tdb, ltype, off, 1);
 407
 408         /*
 409          * Shrink the array by overwriting the element just unlocked with the
 410          * last array element.
 411          */
 412         *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
 413
 414         return ecode;
 415 }
 416
 417 /*
 418   get the transaction lock
 419  */
 420 enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
 421 {
 422         return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
 423 }
 424
 425 /*
 426   release the transaction lock
 427  */
 428 void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
 429 {
 430         tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
 431 }
 432
 433 /* We only need to lock individual bytes, but Linux merges consecutive locks
 434  * so we lock in contiguous ranges. */
 435 static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
 436                                        int ltype, enum tdb_lock_flags flags,
 437                                        tdb_off_t off, tdb_off_t len)
 438 {
 439         enum TDB_ERROR ecode;
 440         enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
 441
 442         if (len <= 1) {
 443                 /* 0 would mean to end-of-file... */
 444                 assert(len != 0);
 445                 /* Single hash.  Just do blocking lock. */
 446                 return tdb_brlock(tdb, ltype, off, len, flags);
 447         }
 448
 449         /* First we try non-blocking. */
 450         if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) {
 451                 return TDB_SUCCESS;
 452         }
 453
 454         /* Try locking first half, then second. */
 455         ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
 456         if (ecode != TDB_SUCCESS)
 457                 return ecode;
 458
 459         ecode = tdb_lock_gradual(tdb, ltype, flags,
 460                                  off + len / 2, len - len / 2);
 461         if (ecode != TDB_SUCCESS) {
 462                 tdb_brunlock(tdb, ltype, off, len / 2);
 463         }
 464         return ecode;
 465 }
 466
 467 /* lock/unlock entire database.  It can only be upgradable if you have some
 468  * other way of guaranteeing exclusivity (ie. transaction write lock). */
 469 enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 470                                   enum tdb_lock_flags flags, bool upgradable)
 471 {
 472         enum TDB_ERROR ecode;
 473         tdb_bool_err berr;
 474
 475         if (tdb->file->allrecord_lock.count) {
 476                 if (tdb->file->allrecord_lock.owner != tdb) {
 477                         return owner_conflict(tdb, "tdb_allrecord_lock");
 478                 }
 479
 480                 if (ltype == F_RDLCK
 481                     || tdb->file->allrecord_lock.ltype == F_WRLCK) {
 482                         tdb->file->allrecord_lock.count++;
 483                         return TDB_SUCCESS;
 484                 }
 485
 486                 /* a global lock of a different type exists */
 487                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 488                                   "tdb_allrecord_lock: already have %s lock",
 489                                   tdb->file->allrecord_lock.ltype == F_RDLCK
 490                                   ? "read" : "write");
 491         }
 492
 493         if (tdb_has_hash_locks(tdb)) {
 494                 /* can't combine global and chain locks */
 495                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 496                                   "tdb_allrecord_lock:"
 497                                   " already have chain lock");
 498         }
 499
 500         if (upgradable && ltype != F_RDLCK) {
 501                 /* tdb error: you can't upgrade a write lock! */
 502                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 503                                   "tdb_allrecord_lock:"
 504                                   " can't upgrade a write lock");
 505         }
 506
 507         add_stat(tdb, locks, 1);
 508 again:
 509         /* Lock hashes, gradually. */
 510         ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
 511                                  TDB_HASH_LOCK_RANGE);
 512         if (ecode != TDB_SUCCESS) {
 513                 if (!(flags & TDB_LOCK_PROBE)) {
 514                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
 515                                    "tdb_allrecord_lock hashes failed");
 516                 }
 517                 return ecode;
 518         }
 519
 520         /* Lock free tables: there to end of file. */
 521         ecode = tdb_brlock(tdb, ltype,
 522                            TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
 523                            0, flags);
 524         if (ecode != TDB_SUCCESS) {
 525                 if (!(flags & TDB_LOCK_PROBE)) {
 526                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
 527                                  "tdb_allrecord_lock freetables failed");
 528                 }
 529                 tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
 530                              TDB_HASH_LOCK_RANGE);
 531                 return ecode;
 532         }
 533
 534         tdb->file->allrecord_lock.owner = tdb;
 535         tdb->file->allrecord_lock.count = 1;
 536         /* If it's upgradable, it's actually exclusive so we can treat
 537          * it as a write lock. */
 538         tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
 539         tdb->file->allrecord_lock.off = upgradable;
 540
 541         /* Now check for needing recovery. */
 542         if (flags & TDB_LOCK_NOCHECK)
 543                 return TDB_SUCCESS;
 544
 545         berr = tdb_needs_recovery(tdb);
 546         if (likely(berr == false))
 547                 return TDB_SUCCESS;
 548
 549         tdb_allrecord_unlock(tdb, ltype);
 550         if (berr < 0)
 551                 return berr;
 552         ecode = tdb_lock_and_recover(tdb);
 553         if (ecode != TDB_SUCCESS) {
 554                 return ecode;
 555         }
 556         goto again;
 557 }
 558
 559 enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, enum tdb_lock_flags flags)
 560 {
 561         return tdb_nest_lock(tdb, TDB_OPEN_LOCK, F_WRLCK, flags);
 562 }
 563
 564 void tdb_unlock_open(struct tdb_context *tdb)
 565 {
 566         tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
 567 }
 568
 569 bool tdb_has_open_lock(struct tdb_context *tdb)
 570 {
 571         return !(tdb->flags & TDB_NOLOCK)
 572                 && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
 573 }
 574
 575 enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
 576 {
 577         /* Lock doesn't protect data, so don't check (we recurse if we do!) */
 578         return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
 579                              TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
 580 }
 581
 582 void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
 583 {
 584         tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
 585 }
 586
 587 /* unlock entire db */
 588 void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
 589 {
 590         if (tdb->file->allrecord_lock.count == 0) {
 591                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 592                            "tdb_allrecord_unlock: not locked!");
 593                 return;
 594         }
 595
 596         if (tdb->file->allrecord_lock.owner != tdb) {
 597                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 598                            "tdb_allrecord_unlock: not locked by us!");
 599                 return;
 600         }
 601
 602         /* Upgradable locks are marked as write locks. */
 603         if (tdb->file->allrecord_lock.ltype != ltype
 604             && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
 605                 tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 606                            "tdb_allrecord_unlock: have %s lock",
 607                            tdb->file->allrecord_lock.ltype == F_RDLCK
 608                            ? "read" : "write");
 609                 return;
 610         }
 611
 612         if (tdb->file->allrecord_lock.count > 1) {
 613                 tdb->file->allrecord_lock.count--;
 614                 return;
 615         }
 616
 617         tdb->file->allrecord_lock.count = 0;
 618         tdb->file->allrecord_lock.ltype = 0;
 619
 620         tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
 621 }
 622
 623 bool tdb_has_expansion_lock(struct tdb_context *tdb)
 624 {
 625         return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
 626 }
 627
 628 bool tdb_has_hash_locks(struct tdb_context *tdb)
 629 {
 630         unsigned int i;
 631
 632         for (i=0; i<tdb->file->num_lockrecs; i++) {
 633                 if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
 634                     && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
 635                                                      + TDB_HASH_LOCK_RANGE))
 636                         return true;
 637         }
 638         return false;
 639 }
 640
 641 static bool tdb_has_free_lock(struct tdb_context *tdb)
 642 {
 643         unsigned int i;
 644
 645         if (tdb->flags & TDB_NOLOCK)
 646                 return false;
 647
 648         for (i=0; i<tdb->file->num_lockrecs; i++) {
 649                 if (tdb->file->lockrecs[i].off
 650                     > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
 651                         return true;
 652         }
 653         return false;
 654 }
 655
 656 enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
 657                                tdb_off_t hash_lock,
 658                                tdb_len_t hash_range,
 659                                int ltype, enum tdb_lock_flags waitflag)
 660 {
 661         /* FIXME: Do this properly, using hlock_range */
 662         unsigned lock = TDB_HASH_LOCK_START
 663                 + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
 664
 665         /* a allrecord lock allows us to avoid per chain locks */
 666         if (tdb->file->allrecord_lock.count) {
 667                 if (tdb->file->allrecord_lock.owner != tdb)
 668                         return owner_conflict(tdb, "tdb_lock_hashes");
 669                 if (ltype == tdb->file->allrecord_lock.ltype
 670                     || ltype == F_RDLCK) {
 671                         return TDB_SUCCESS;
 672                 }
 673
 674                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
 675                                   "tdb_lock_hashes:"
 676                                   " already have %s allrecordlock",
 677                                   tdb->file->allrecord_lock.ltype == F_RDLCK
 678                                   ? "read" : "write");
 679         }
 680
 681         if (tdb_has_free_lock(tdb)) {
 682                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 683                                   "tdb_lock_hashes: already have free lock");
 684         }
 685
 686         if (tdb_has_expansion_lock(tdb)) {
 687                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 688                                   "tdb_lock_hashes:"
 689                                   " already have expansion lock");
 690         }
 691
 692         return tdb_nest_lock(tdb, lock, ltype, waitflag);
 693 }
 694
 695 enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
 696                                  tdb_off_t hash_lock,
 697                                  tdb_len_t hash_range, int ltype)
 698 {
 699         unsigned lock = TDB_HASH_LOCK_START
 700                 + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
 701
 702         if (tdb->flags & TDB_NOLOCK)
 703                 return 0;
 704
 705         /* a allrecord lock allows us to avoid per chain locks */
 706         if (tdb->file->allrecord_lock.count) {
 707                 if (tdb->file->allrecord_lock.ltype == F_RDLCK
 708                     && ltype == F_WRLCK) {
 709                         return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 710                                           "tdb_unlock_hashes RO allrecord!");
 711                 }
 712                 return TDB_SUCCESS;
 713         }
 714
 715         return tdb_nest_unlock(tdb, lock, ltype);
 716 }
 717
 718 /* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
 719  * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
 720  * The result is that on 32 bit systems we don't use lock values > 2^31 on
 721  * files that are less than 4GB.
 722  */
 723 static tdb_off_t free_lock_off(tdb_off_t b_off)
 724 {
 725         return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
 726                 + b_off / sizeof(tdb_off_t);
 727 }
 728
 729 enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
 730                                     enum tdb_lock_flags waitflag)
 731 {
 732         assert(b_off >= sizeof(struct tdb_header));
 733
 734         if (tdb->flags & TDB_NOLOCK)
 735                 return 0;
 736
 737         /* a allrecord lock allows us to avoid per chain locks */
 738         if (tdb->file->allrecord_lock.count) {
 739                 if (tdb->file->allrecord_lock.ltype == F_WRLCK)
 740                         return 0;
 741                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 742                                   "tdb_lock_free_bucket with"
 743                                   " read-only allrecordlock!");
 744         }
 745
 746 #if 0 /* FIXME */
 747         if (tdb_has_expansion_lock(tdb)) {
 748                 return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 749                                   "tdb_lock_free_bucket:"
 750                                   " already have expansion lock");
 751         }
 752 #endif
 753
 754         return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
 755 }
 756
 757 void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
 758 {
 759         if (tdb->file->allrecord_lock.count)
 760                 return;
 761
 762         tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
 763 }
 764
 765 void tdb_unlock_all(struct tdb_context *tdb)
 766 {
 767         unsigned int i;
 768
 769         while (tdb->file->allrecord_lock.count
 770                && tdb->file->allrecord_lock.owner == tdb) {
 771                 tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
 772         }
 773
 774         for (i=0; i<tdb->file->num_lockrecs; i++) {
 775                 if (tdb->file->lockrecs[i].owner == tdb) {
 776                         tdb_nest_unlock(tdb,
 777                                         tdb->file->lockrecs[i].off,
 778                                         tdb->file->lockrecs[i].ltype);
 779                         i--;
 780                 }
 781         }
 782 }