git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_lock.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29
  30 /* list -1 is the alloc list, otherwise a hash chain. */
  31 static tdb1_off_t lock_offset(int list)
  32 {
  33         return TDB1_FREELIST_TOP + 4*list;
  34 }
  35
  36 /* a byte range locking function - return 0 on success
  37    this functions locks/unlocks 1 byte at the specified offset.
  38
  39    On error, errno is also set so that errors are passed back properly
  40    through tdb1_open().
  41
  42    note that a len of zero means lock to end of file
  43 */
  44 int tdb1_brlock(struct tdb_context *tdb,
  45                int rw_type, tdb1_off_t offset, size_t len,
  46                enum tdb_lock_flags flags)
  47 {
  48         enum TDB_ERROR ecode = tdb_brlock(tdb, rw_type, offset, len, flags
  49                                           | TDB_LOCK_NOCHECK);
  50         if (ecode == TDB_SUCCESS)
  51                 return 0;
  52         tdb->last_error = ecode;
  53         return -1;
  54 }
  55
  56 int tdb1_brunlock(struct tdb_context *tdb,
  57                  int rw_type, tdb1_off_t offset, size_t len)
  58 {
  59         enum TDB_ERROR ecode = tdb_brunlock(tdb, rw_type, offset, len);
  60         if (ecode == TDB_SUCCESS)
  61                 return 0;
  62         tdb->last_error = ecode;
  63         return -1;
  64 }
  65
  66 int tdb1_allrecord_upgrade(struct tdb_context *tdb)
  67 {
  68         enum TDB_ERROR ecode = tdb_allrecord_upgrade(tdb, TDB1_FREELIST_TOP);
  69         if (ecode == TDB_SUCCESS)
  70                 return 0;
  71         tdb->last_error = ecode;
  72         return -1;
  73 }
  74
  75 static struct tdb_lock *tdb1_find_nestlock(struct tdb_context *tdb,
  76                                            tdb1_off_t offset)
  77 {
  78         unsigned int i;
  79
  80         for (i=0; i<tdb->file->num_lockrecs; i++) {
  81                 if (tdb->file->lockrecs[i].off == offset) {
  82                         return &tdb->file->lockrecs[i];
  83                 }
  84         }
  85         return NULL;
  86 }
  87
  88 /* lock an offset in the database. */
  89 int tdb1_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
  90                   enum tdb_lock_flags flags)
  91 {
  92         enum TDB_ERROR ecode;
  93
  94         if (offset >= lock_offset(tdb->tdb1.header.hash_size)) {
  95                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  96                                         "tdb1_lock: invalid offset %u for"
  97                                         " ltype=%d",
  98                                         offset, ltype);
  99                 return -1;
 100         }
 101
 102         ecode = tdb_nest_lock(tdb, offset, ltype, flags | TDB_LOCK_NOCHECK);
 103         if (unlikely(ecode != TDB_SUCCESS)) {
 104                 tdb->last_error = ecode;
 105                 return -1;
 106         }
 107         return 0;
 108 }
 109
 110 static int tdb1_lock_and_recover(struct tdb_context *tdb)
 111 {
 112         int ret;
 113
 114         /* We need to match locking order in transaction commit. */
 115         if (tdb1_brlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0,
 116                         TDB_LOCK_WAIT|TDB_LOCK_NOCHECK)) {
 117                 return -1;
 118         }
 119
 120         if (tdb1_brlock(tdb, F_WRLCK, TDB1_OPEN_LOCK, 1,
 121                         TDB_LOCK_WAIT|TDB_LOCK_NOCHECK)) {
 122                 tdb1_brunlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0);
 123                 return -1;
 124         }
 125
 126         ret = tdb1_transaction_recover(tdb);
 127
 128         tdb1_brunlock(tdb, F_WRLCK, TDB1_OPEN_LOCK, 1);
 129         tdb1_brunlock(tdb, F_WRLCK, TDB1_FREELIST_TOP, 0);
 130
 131         return ret;
 132 }
 133
 134 static bool have_data_locks(const struct tdb_context *tdb)
 135 {
 136         unsigned int i;
 137
 138         for (i = 0; i < tdb->file->num_lockrecs; i++) {
 139                 if (tdb->file->lockrecs[i].off >= lock_offset(-1))
 140                         return true;
 141         }
 142         return false;
 143 }
 144
 145 static int tdb1_lock_list(struct tdb_context *tdb, int list, int ltype,
 146                          enum tdb_lock_flags waitflag)
 147 {
 148         int ret;
 149         bool check = false;
 150
 151         /* a allrecord lock allows us to avoid per chain locks */
 152         if (tdb->file->allrecord_lock.count) {
 153                 if (!check_lock_pid(tdb, "tdb1_lock_list", true)) {
 154                         tdb->last_error = TDB_ERR_LOCK;
 155                         return -1;
 156                 }
 157                 if (tdb->file->allrecord_lock.owner != tdb) {
 158                         tdb->last_error = owner_conflict(tdb, "tdb1_lock_list");
 159                         return -1;
 160                 }
 161                 if (ltype == tdb->file->allrecord_lock.ltype
 162                     || ltype == F_RDLCK) {
 163                         return 0;
 164                 }
 165                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
 166                                              TDB_LOG_USE_ERROR,
 167                                              "tdb1_lock_list:"
 168                                              " already have read lock");
 169                 return -1;
 170         }
 171
 172         /* Only check when we grab first data lock. */
 173         check = !have_data_locks(tdb);
 174         ret = tdb1_nest_lock(tdb, lock_offset(list), ltype, waitflag);
 175
 176         if (ret == 0 && check) {
 177                 tdb_bool_err berr = tdb1_needs_recovery(tdb);
 178
 179                 if (berr < 0) {
 180                         return -1;
 181                 }
 182                 if (berr == true) {
 183                         tdb1_nest_unlock(tdb, lock_offset(list), ltype);
 184
 185                         if (tdb1_lock_and_recover(tdb) == -1) {
 186                                 return -1;
 187                         }
 188                         return tdb1_lock_list(tdb, list, ltype, waitflag);
 189                 }
 190         }
 191         return ret;
 192 }
 193
 194 /* lock a list in the database. list -1 is the alloc list */
 195 int tdb1_lock(struct tdb_context *tdb, int list, int ltype)
 196 {
 197         int ret;
 198
 199         ret = tdb1_lock_list(tdb, list, ltype, TDB_LOCK_WAIT);
 200         /* Don't log for EAGAIN and EINTR: they could have overridden lock fns */
 201         if (ret && errno != EAGAIN && errno != EINTR) {
 202                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 203                            "tdb1_lock failed on list %d "
 204                            "ltype=%d (%s)",  list, ltype, strerror(errno));
 205         }
 206         return ret;
 207 }
 208
 209 int tdb1_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype)
 210 {
 211         enum TDB_ERROR ecode;
 212
 213         /* Sanity checks */
 214         if (offset >= lock_offset(tdb->tdb1.header.hash_size)) {
 215                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
 216                                         "tdb1_unlock: offset %u invalid (%d)",
 217                                         offset, tdb->tdb1.header.hash_size);
 218                 return -1;
 219         }
 220
 221         ecode = tdb_nest_unlock(tdb, offset, ltype);
 222         if (unlikely(ecode != TDB_SUCCESS)) {
 223                 tdb->last_error = ecode;
 224                 return -1;
 225         }
 226         return 0;
 227 }
 228
 229 int tdb1_unlock(struct tdb_context *tdb, int list, int ltype)
 230 {
 231         /* a global lock allows us to avoid per chain locks */
 232         if (tdb->file->allrecord_lock.count &&
 233             (ltype == tdb->file->allrecord_lock.ltype || ltype == F_RDLCK)) {
 234                 if (tdb->file->allrecord_lock.owner != tdb) {
 235                         tdb->last_error = owner_conflict(tdb, "tdb1_unlock");
 236                         return -1;
 237                 }
 238                 return 0;
 239         }
 240
 241         if (tdb->file->allrecord_lock.count) {
 242                 tdb->last_error = TDB_ERR_LOCK;
 243                 return -1;
 244         }
 245
 246         return tdb1_nest_unlock(tdb, lock_offset(list), ltype);
 247 }
 248
 249 /*
 250   get the transaction lock
 251  */
 252 int tdb1_transaction_lock(struct tdb_context *tdb, int ltype,
 253                          enum tdb_lock_flags lockflags)
 254 {
 255         return tdb1_nest_lock(tdb, TDB1_TRANSACTION_LOCK, ltype, lockflags);
 256 }
 257
 258 /*
 259   release the transaction lock
 260  */
 261 int tdb1_transaction_unlock(struct tdb_context *tdb, int ltype)
 262 {
 263         return tdb1_nest_unlock(tdb, TDB1_TRANSACTION_LOCK, ltype);
 264 }
 265
 266 /* lock/unlock entire database.  It can only be upgradable if you have some
 267  * other way of guaranteeing exclusivity (ie. transaction write lock).
 268  * We do the locking gradually to avoid being starved by smaller locks. */
 269 int tdb1_allrecord_lock(struct tdb_context *tdb, int ltype,
 270                        enum tdb_lock_flags flags, bool upgradable)
 271 {
 272         enum TDB_ERROR ecode;
 273         tdb_bool_err berr;
 274
 275         /* tdb_lock_gradual() doesn't know about tdb->tdb1.traverse_read. */
 276         if (tdb->tdb1.traverse_read && !(tdb->flags & TDB_NOLOCK)) {
 277                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
 278                                              TDB_LOG_USE_ERROR,
 279                                              "tdb1_allrecord_lock during"
 280                                              " tdb1_read_traverse");
 281                 return -1;
 282         }
 283
 284         if (tdb->file->allrecord_lock.count
 285             && tdb->file->allrecord_lock.ltype == ltype) {
 286                 tdb->file->allrecord_lock.count++;
 287                 return 0;
 288         }
 289
 290         if (tdb1_have_extra_locks(tdb)) {
 291                 /* can't combine global and chain locks */
 292                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
 293                                              TDB_LOG_USE_ERROR,
 294                                              "tdb1_allrecord_lock holding"
 295                                              " other locks");
 296                 return -1;
 297         }
 298
 299         if (upgradable && ltype != F_RDLCK) {
 300                 /* tdb error: you can't upgrade a write lock! */
 301                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK,
 302                                              TDB_LOG_ERROR,
 303                                              "tdb1_allrecord_lock cannot"
 304                                              " have upgradable write lock");
 305                 return -1;
 306         }
 307
 308         /* We cover two kinds of locks:
 309          * 1) Normal chain locks.  Taken for almost all operations.
 310          * 3) Individual records locks.  Taken after normal or free
 311          *    chain locks.
 312          *
 313          * It is (1) which cause the starvation problem, so we're only
 314          * gradual for that. */
 315         ecode = tdb_lock_gradual(tdb, ltype, flags | TDB_LOCK_NOCHECK,
 316                                  TDB1_FREELIST_TOP, tdb->tdb1.header.hash_size * 4);
 317         if (ecode != TDB_SUCCESS) {
 318                 tdb->last_error = ecode;
 319                 return -1;
 320         }
 321
 322         /* Grab individual record locks. */
 323         if (tdb1_brlock(tdb, ltype, lock_offset(tdb->tdb1.header.hash_size), 0,
 324                        flags) == -1) {
 325                 tdb1_brunlock(tdb, ltype, TDB1_FREELIST_TOP,
 326                              tdb->tdb1.header.hash_size * 4);
 327                 return -1;
 328         }
 329
 330         tdb->file->allrecord_lock.owner = tdb;
 331         tdb->file->allrecord_lock.count = 1;
 332         tdb->file->locker = getpid();
 333         /* If it's upgradable, it's actually exclusive so we can treat
 334          * it as a write lock. */
 335         tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
 336         tdb->file->allrecord_lock.off = upgradable;
 337
 338         berr = tdb1_needs_recovery(tdb);
 339         if (berr < 0) {
 340                 return -1;
 341         }
 342
 343         if (berr == true) {
 344                 tdb1_allrecord_unlock(tdb, ltype);
 345                 if (tdb1_lock_and_recover(tdb) == -1) {
 346                         return -1;
 347                 }
 348                 return tdb1_allrecord_lock(tdb, ltype, flags, upgradable);
 349         }
 350
 351         return 0;
 352 }
 353
 354
 355
 356 /* unlock entire db */
 357 int tdb1_allrecord_unlock(struct tdb_context *tdb, int ltype)
 358 {
 359         /* Don't try this during r/o traversal! */
 360         if (tdb->tdb1.traverse_read) {
 361                 tdb->last_error = TDB_ERR_LOCK;
 362                 return -1;
 363         }
 364
 365         if (tdb->file->allrecord_lock.count == 0) {
 366                 tdb->last_error = TDB_ERR_LOCK;
 367                 return -1;
 368         }
 369
 370         /* Upgradable locks are marked as write locks. */
 371         if (tdb->file->allrecord_lock.ltype != ltype
 372             && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
 373                 tdb->last_error = TDB_ERR_LOCK;
 374                 return -1;
 375         }
 376
 377         if (tdb->file->allrecord_lock.count > 1) {
 378                 if (tdb->file->allrecord_lock.owner != tdb) {
 379                         tdb->last_error
 380                                 = owner_conflict(tdb, "tdb1_allrecord_unlock");
 381                         return -1;
 382                 }
 383                 tdb->file->allrecord_lock.count--;
 384                 return 0;
 385         }
 386
 387         tdb->file->allrecord_lock.count = 0;
 388         tdb->file->allrecord_lock.ltype = 0;
 389
 390         if (tdb1_brunlock(tdb, ltype, TDB1_FREELIST_TOP, 0)) {
 391                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 392                            "tdb1_unlockall failed (%s)", strerror(errno));
 393                 return -1;
 394         }
 395
 396         return 0;
 397 }
 398
 399 /* lock/unlock one hash chain. This is meant to be used to reduce
 400    contention - it cannot guarantee how many records will be locked */
 401 int tdb1_chainlock(struct tdb_context *tdb, TDB_DATA key)
 402 {
 403         int ret = tdb1_lock(tdb,
 404                             TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
 405                             F_WRLCK);
 406         return ret;
 407 }
 408
 409 int tdb1_chainunlock(struct tdb_context *tdb, TDB_DATA key)
 410 {
 411         return tdb1_unlock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
 412                            F_WRLCK);
 413 }
 414
 415 int tdb1_chainlock_read(struct tdb_context *tdb, TDB_DATA key)
 416 {
 417         int ret;
 418         ret = tdb1_lock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
 419                         F_RDLCK);
 420         return ret;
 421 }
 422
 423 int tdb1_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
 424 {
 425         return tdb1_unlock(tdb, TDB1_BUCKET(tdb_hash(tdb, key.dptr, key.dsize)),
 426                            F_RDLCK);
 427 }
 428
 429 /* record lock stops delete underneath */
 430 int tdb1_lock_record(struct tdb_context *tdb, tdb1_off_t off)
 431 {
 432         if (tdb->file->allrecord_lock.count) {
 433                 if (!check_lock_pid(tdb, "tdb1_lock_record", true)) {
 434                         tdb->last_error = TDB_ERR_LOCK;
 435                         return -1;
 436                 }
 437                 if (tdb->file->allrecord_lock.owner != tdb) {
 438                         tdb->last_error = owner_conflict(tdb,
 439                                                          "tdb1_lock_record");
 440                         return -1;
 441                 }
 442                 return 0;
 443         }
 444         return off ? tdb1_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0;
 445 }
 446
 447 /*
 448   Write locks override our own fcntl readlocks, so check it here.
 449   Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
 450   an error to fail to get the lock here.
 451 */
 452 int tdb1_write_lock_record(struct tdb_context *tdb, tdb1_off_t off)
 453 {
 454         struct tdb1_traverse_lock *i;
 455         for (i = &tdb->tdb1.travlocks; i; i = i->next)
 456                 if (i->off == off)
 457                         return -1;
 458         if (tdb->file->allrecord_lock.count) {
 459                 if (!check_lock_pid(tdb, "tdb1_write_lock_record", true)) {
 460                         tdb->last_error = TDB_ERR_LOCK;
 461                         return -1;
 462                 }
 463                 if (tdb->file->allrecord_lock.owner != tdb) {
 464                         tdb->last_error
 465                                 = owner_conflict(tdb, "tdb1_write_lock_record");
 466                         return -1;
 467                 }
 468                 if (tdb->file->allrecord_lock.ltype == F_WRLCK) {
 469                         return 0;
 470                 }
 471                 return -1;
 472         }
 473         return tdb1_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
 474 }
 475
 476 int tdb1_write_unlock_record(struct tdb_context *tdb, tdb1_off_t off)
 477 {
 478         if (tdb->file->allrecord_lock.count) {
 479                 if (tdb->file->allrecord_lock.owner != tdb) {
 480                         tdb->last_error
 481                                 = owner_conflict(tdb,
 482                                                  "tdb1_write_unlock_record");
 483                         return -1;
 484                 }
 485                 return 0;
 486         }
 487         return tdb1_brunlock(tdb, F_WRLCK, off, 1);
 488 }
 489
 490 /* fcntl locks don't stack: avoid unlocking someone else's */
 491 int tdb1_unlock_record(struct tdb_context *tdb, tdb1_off_t off)
 492 {
 493         struct tdb1_traverse_lock *i;
 494         uint32_t count = 0;
 495
 496         if (tdb->file->allrecord_lock.count) {
 497                 if (tdb->file->allrecord_lock.owner != tdb) {
 498                         tdb->last_error = owner_conflict(tdb,
 499                                                          "tdb1_unlock_record");
 500                         return -1;
 501                 }
 502                 return 0;
 503         }
 504
 505         if (off == 0)
 506                 return 0;
 507         for (i = &tdb->tdb1.travlocks; i; i = i->next)
 508                 if (i->off == off)
 509                         count++;
 510         return (count == 1 ? tdb1_brunlock(tdb, F_RDLCK, off, 1) : 0);
 511 }
 512
 513 bool tdb1_have_extra_locks(struct tdb_context *tdb)
 514 {
 515         unsigned int extra = tdb->file->num_lockrecs;
 516
 517         /* A transaction holds the lock for all records. */
 518         if (!tdb->tdb1.transaction && tdb->file->allrecord_lock.count) {
 519                 return true;
 520         }
 521
 522         /* We always hold the active lock if CLEAR_IF_FIRST. */
 523         if (tdb1_find_nestlock(tdb, TDB1_ACTIVE_LOCK)) {
 524                 extra--;
 525         }
 526
 527         /* In a transaction, we expect to hold the transaction lock */
 528         if (tdb->tdb1.transaction
 529             && tdb1_find_nestlock(tdb, TDB1_TRANSACTION_LOCK)) {
 530                 extra--;
 531         }
 532
 533         return extra;
 534 }
 535
 536 /* The transaction code uses this to remove all locks. */
 537 void tdb1_release_transaction_locks(struct tdb_context *tdb)
 538 {
 539         unsigned int i, active = 0;
 540
 541         if (tdb->file->allrecord_lock.count != 0) {
 542                 tdb1_brunlock(tdb, tdb->file->allrecord_lock.ltype, TDB1_FREELIST_TOP, 0);
 543                 tdb->file->allrecord_lock.count = 0;
 544         }
 545
 546         for (i=0;i<tdb->file->num_lockrecs;i++) {
 547                 struct tdb_lock *lck = &tdb->file->lockrecs[i];
 548
 549                 /* Don't release the active lock!  Copy it to first entry. */
 550                 if (lck->off == TDB1_ACTIVE_LOCK) {
 551                         tdb->file->lockrecs[active++] = *lck;
 552                 } else {
 553                         tdb1_brunlock(tdb, lck->ltype, lck->off, 1);
 554                 }
 555         }
 556         tdb->file->num_lockrecs = active;
 557         if (tdb->file->num_lockrecs == 0) {
 558                 SAFE_FREE(tdb->file->lockrecs);
 559         }
 560 }