git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29
  30 _PUBLIC_ TDB1_DATA tdb1_null;
  31
  32 /*
  33   non-blocking increment of the tdb sequence number if the tdb has been opened using
  34   the TDB1_SEQNUM flag
  35 */
  36 _PUBLIC_ void tdb1_increment_seqnum_nonblock(struct tdb1_context *tdb)
  37 {
  38         tdb1_off_t seqnum=0;
  39
  40         if (!(tdb->flags & TDB1_SEQNUM)) {
  41                 return;
  42         }
  43
  44         /* we ignore errors from this, as we have no sane way of
  45            dealing with them.
  46         */
  47         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  48         seqnum++;
  49         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  50 }
  51
  52 /*
  53   increment the tdb sequence number if the tdb has been opened using
  54   the TDB1_SEQNUM flag
  55 */
  56 static void tdb1_increment_seqnum(struct tdb1_context *tdb)
  57 {
  58         if (!(tdb->flags & TDB1_SEQNUM)) {
  59                 return;
  60         }
  61
  62         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  63                           TDB1_LOCK_WAIT|TDB1_LOCK_PROBE) != 0) {
  64                 return;
  65         }
  66
  67         tdb1_increment_seqnum_nonblock(tdb);
  68
  69         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK, false);
  70 }
  71
  72 static int tdb1_key_compare(TDB1_DATA key, TDB1_DATA data, void *private_data)
  73 {
  74         return memcmp(data.dptr, key.dptr, data.dsize);
  75 }
  76
  77 /* Returns 0 on fail.  On success, return offset of record, and fills
  78    in rec */
  79 static tdb1_off_t tdb1_find(struct tdb1_context *tdb, TDB1_DATA key, uint32_t hash,
  80                         struct tdb1_record *r)
  81 {
  82         tdb1_off_t rec_ptr;
  83
  84         /* read in the hash top */
  85         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  86                 return 0;
  87
  88         /* keep looking until we find the right record */
  89         while (rec_ptr) {
  90                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  91                         return 0;
  92
  93                 if (!TDB1_DEAD(r) && hash==r->full_hash
  94                     && key.dsize==r->key_len
  95                     && tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
  96                                       r->key_len, tdb1_key_compare,
  97                                       NULL) == 0) {
  98                         return rec_ptr;
  99                 }
 100                 /* detect tight infinite loop */
 101                 if (rec_ptr == r->next) {
 102                         tdb->ecode = TDB1_ERR_CORRUPT;
 103                         TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_find: loop detected.\n"));
 104                         return 0;
 105                 }
 106                 rec_ptr = r->next;
 107         }
 108         tdb->ecode = TDB1_ERR_NOEXIST;
 109         return 0;
 110 }
 111
 112 /* As tdb1_find, but if you succeed, keep the lock */
 113 tdb1_off_t tdb1_find_lock_hash(struct tdb1_context *tdb, TDB1_DATA key, uint32_t hash, int locktype,
 114                            struct tdb1_record *rec)
 115 {
 116         uint32_t rec_ptr;
 117
 118         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 119                 return 0;
 120         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 121                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 122         return rec_ptr;
 123 }
 124
 125 static TDB1_DATA _tdb1_fetch(struct tdb1_context *tdb, TDB1_DATA key);
 126
 127 /* update an entry in place - this only works if the new data size
 128    is <= the old data size and the key exists.
 129    on failure return -1.
 130 */
 131 static int tdb1_update_hash(struct tdb1_context *tdb, TDB1_DATA key, uint32_t hash, TDB1_DATA dbuf)
 132 {
 133         struct tdb1_record rec;
 134         tdb1_off_t rec_ptr;
 135
 136         /* find entry */
 137         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 138                 return -1;
 139
 140         /* it could be an exact duplicate of what is there - this is
 141          * surprisingly common (eg. with a ldb re-index). */
 142         if (rec.key_len == key.dsize &&
 143             rec.data_len == dbuf.dsize &&
 144             rec.full_hash == hash) {
 145                 TDB1_DATA data = _tdb1_fetch(tdb, key);
 146                 if (data.dsize == dbuf.dsize &&
 147                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 148                         if (data.dptr) {
 149                                 free(data.dptr);
 150                         }
 151                         return 0;
 152                 }
 153                 if (data.dptr) {
 154                         free(data.dptr);
 155                 }
 156         }
 157
 158         /* must be long enough key, data and tailer */
 159         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 160                 tdb->ecode = TDB1_SUCCESS; /* Not really an error */
 161                 return -1;
 162         }
 163
 164         if (tdb->methods->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 165                       dbuf.dptr, dbuf.dsize) == -1)
 166                 return -1;
 167
 168         if (dbuf.dsize != rec.data_len) {
 169                 /* update size */
 170                 rec.data_len = dbuf.dsize;
 171                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 172         }
 173
 174         return 0;
 175 }
 176
 177 /* find an entry in the database given a key */
 178 /* If an entry doesn't exist tdb1_err will be set to
 179  * TDB1_ERR_NOEXIST. If a key has no data attached
 180  * then the TDB1_DATA will have zero length but
 181  * a non-zero pointer
 182  */
 183 static TDB1_DATA _tdb1_fetch(struct tdb1_context *tdb, TDB1_DATA key)
 184 {
 185         tdb1_off_t rec_ptr;
 186         struct tdb1_record rec;
 187         TDB1_DATA ret;
 188         uint32_t hash;
 189
 190         /* find which hash bucket it is in */
 191         hash = tdb->hash_fn(&key);
 192         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 193                 return tdb1_null;
 194
 195         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 196                                   rec.data_len);
 197         ret.dsize = rec.data_len;
 198         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 199         return ret;
 200 }
 201
 202 _PUBLIC_ TDB1_DATA tdb1_fetch(struct tdb1_context *tdb, TDB1_DATA key)
 203 {
 204         TDB1_DATA ret = _tdb1_fetch(tdb, key);
 205
 206         return ret;
 207 }
 208
 209 /*
 210  * Find an entry in the database and hand the record's data to a parsing
 211  * function. The parsing function is executed under the chain read lock, so it
 212  * should be fast and should not block on other syscalls.
 213  *
 214  * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 215  *
 216  * For mmapped tdb's that do not have a transaction open it points the parsing
 217  * function directly at the mmap area, it avoids the malloc/memcpy in this
 218  * case. If a transaction is open or no mmap is available, it has to do
 219  * malloc/read/parse/free.
 220  *
 221  * This is interesting for all readers of potentially large data structures in
 222  * the tdb records, ldb indexes being one example.
 223  *
 224  * Return -1 if the record was not found.
 225  */
 226
 227 _PUBLIC_ int tdb1_parse_record(struct tdb1_context *tdb, TDB1_DATA key,
 228                      int (*parser)(TDB1_DATA key, TDB1_DATA data,
 229                                    void *private_data),
 230                      void *private_data)
 231 {
 232         tdb1_off_t rec_ptr;
 233         struct tdb1_record rec;
 234         int ret;
 235         uint32_t hash;
 236
 237         /* find which hash bucket it is in */
 238         hash = tdb->hash_fn(&key);
 239
 240         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 241                 /* record not found */
 242                 tdb->ecode = TDB1_ERR_NOEXIST;
 243                 return -1;
 244         }
 245
 246         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 247                              rec.data_len, parser, private_data);
 248
 249         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 250
 251         return ret;
 252 }
 253
 254 /* check if an entry in the database exists
 255
 256    note that 1 is returned if the key is found and 0 is returned if not found
 257    this doesn't match the conventions in the rest of this module, but is
 258    compatible with gdbm
 259 */
 260 static int tdb1_exists_hash(struct tdb1_context *tdb, TDB1_DATA key, uint32_t hash)
 261 {
 262         struct tdb1_record rec;
 263
 264         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 265                 return 0;
 266         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 267         return 1;
 268 }
 269
 270 _PUBLIC_ int tdb1_exists(struct tdb1_context *tdb, TDB1_DATA key)
 271 {
 272         uint32_t hash = tdb->hash_fn(&key);
 273         int ret;
 274
 275         ret = tdb1_exists_hash(tdb, key, hash);
 276         return ret;
 277 }
 278
 279 /* actually delete an entry in the database given the offset */
 280 int tdb1_do_delete(struct tdb1_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 281 {
 282         tdb1_off_t last_ptr, i;
 283         struct tdb1_record lastrec;
 284
 285         if (tdb->read_only || tdb->traverse_read) return -1;
 286
 287         if (((tdb->traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 288             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 289                 /* Someone traversing here: mark it as dead */
 290                 rec->magic = TDB1_DEAD_MAGIC;
 291                 return tdb1_rec_write(tdb, rec_ptr, rec);
 292         }
 293         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 294                 return -1;
 295
 296         /* find previous record in hash chain */
 297         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 298                 return -1;
 299         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 300                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 301                         return -1;
 302
 303         /* unlink it: next ptr is at start of record. */
 304         if (last_ptr == 0)
 305                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 306         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 307                 return -1;
 308
 309         /* recover the space */
 310         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 311                 return -1;
 312         return 0;
 313 }
 314
 315 static int tdb1_count_dead(struct tdb1_context *tdb, uint32_t hash)
 316 {
 317         int res = 0;
 318         tdb1_off_t rec_ptr;
 319         struct tdb1_record rec;
 320
 321         /* read in the hash top */
 322         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 323                 return 0;
 324
 325         while (rec_ptr) {
 326                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 327                         return 0;
 328
 329                 if (rec.magic == TDB1_DEAD_MAGIC) {
 330                         res += 1;
 331                 }
 332                 rec_ptr = rec.next;
 333         }
 334         return res;
 335 }
 336
 337 /*
 338  * Purge all DEAD records from a hash chain
 339  */
 340 static int tdb1_purge_dead(struct tdb1_context *tdb, uint32_t hash)
 341 {
 342         int res = -1;
 343         struct tdb1_record rec;
 344         tdb1_off_t rec_ptr;
 345
 346         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 347                 return -1;
 348         }
 349
 350         /* read in the hash top */
 351         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 352                 goto fail;
 353
 354         while (rec_ptr) {
 355                 tdb1_off_t next;
 356
 357                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 358                         goto fail;
 359                 }
 360
 361                 next = rec.next;
 362
 363                 if (rec.magic == TDB1_DEAD_MAGIC
 364                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 365                         goto fail;
 366                 }
 367                 rec_ptr = next;
 368         }
 369         res = 0;
 370  fail:
 371         tdb1_unlock(tdb, -1, F_WRLCK);
 372         return res;
 373 }
 374
 375 /* delete an entry in the database given a key */
 376 static int tdb1_delete_hash(struct tdb1_context *tdb, TDB1_DATA key, uint32_t hash)
 377 {
 378         tdb1_off_t rec_ptr;
 379         struct tdb1_record rec;
 380         int ret;
 381
 382         if (tdb->max_dead_records != 0) {
 383
 384                 /*
 385                  * Allow for some dead records per hash chain, mainly for
 386                  * tdb's with a very high create/delete rate like locking.tdb.
 387                  */
 388
 389                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 390                         return -1;
 391
 392                 if (tdb1_count_dead(tdb, hash) >= tdb->max_dead_records) {
 393                         /*
 394                          * Don't let the per-chain freelist grow too large,
 395                          * delete all existing dead records
 396                          */
 397                         tdb1_purge_dead(tdb, hash);
 398                 }
 399
 400                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 401                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 402                         return -1;
 403                 }
 404
 405                 /*
 406                  * Just mark the record as dead.
 407                  */
 408                 rec.magic = TDB1_DEAD_MAGIC;
 409                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 410         }
 411         else {
 412                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 413                                                    &rec)))
 414                         return -1;
 415
 416                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 417         }
 418
 419         if (ret == 0) {
 420                 tdb1_increment_seqnum(tdb);
 421         }
 422
 423         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 424                 TDB1_LOG((tdb, TDB1_DEBUG_WARNING, "tdb1_delete: WARNING tdb1_unlock failed!\n"));
 425         return ret;
 426 }
 427
 428 _PUBLIC_ int tdb1_delete(struct tdb1_context *tdb, TDB1_DATA key)
 429 {
 430         uint32_t hash = tdb->hash_fn(&key);
 431         int ret;
 432
 433         ret = tdb1_delete_hash(tdb, key, hash);
 434         return ret;
 435 }
 436
 437 /*
 438  * See if we have a dead record around with enough space
 439  */
 440 static tdb1_off_t tdb1_find_dead(struct tdb1_context *tdb, uint32_t hash,
 441                                struct tdb1_record *r, tdb1_len_t length)
 442 {
 443         tdb1_off_t rec_ptr;
 444
 445         /* read in the hash top */
 446         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 447                 return 0;
 448
 449         /* keep looking until we find the right record */
 450         while (rec_ptr) {
 451                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 452                         return 0;
 453
 454                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 455                         /*
 456                          * First fit for simple coding, TODO: change to best
 457                          * fit
 458                          */
 459                         return rec_ptr;
 460                 }
 461                 rec_ptr = r->next;
 462         }
 463         return 0;
 464 }
 465
 466 static int _tdb1_store(struct tdb1_context *tdb, TDB1_DATA key,
 467                        TDB1_DATA dbuf, int flag, uint32_t hash)
 468 {
 469         struct tdb1_record rec;
 470         tdb1_off_t rec_ptr;
 471         char *p = NULL;
 472         int ret = -1;
 473
 474         /* check for it existing, on insert. */
 475         if (flag == TDB1_INSERT) {
 476                 if (tdb1_exists_hash(tdb, key, hash)) {
 477                         tdb->ecode = TDB1_ERR_EXISTS;
 478                         goto fail;
 479                 }
 480         } else {
 481                 /* first try in-place update, on modify or replace. */
 482                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 483                         goto done;
 484                 }
 485                 if (tdb->ecode == TDB1_ERR_NOEXIST &&
 486                     flag == TDB1_MODIFY) {
 487                         /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 488                          we should fail the store */
 489                         goto fail;
 490                 }
 491         }
 492         /* reset the error code potentially set by the tdb1_update() */
 493         tdb->ecode = TDB1_SUCCESS;
 494
 495         /* delete any existing record - if it doesn't exist we don't
 496            care.  Doing this first reduces fragmentation, and avoids
 497            coalescing with `allocated' block before it's updated. */
 498         if (flag != TDB1_INSERT)
 499                 tdb1_delete_hash(tdb, key, hash);
 500
 501         /* Copy key+value *before* allocating free space in case malloc
 502            fails and we are left with a dead spot in the tdb. */
 503
 504         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 505                 tdb->ecode = TDB1_ERR_OOM;
 506                 goto fail;
 507         }
 508
 509         memcpy(p, key.dptr, key.dsize);
 510         if (dbuf.dsize)
 511                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 512
 513         if (tdb->max_dead_records != 0) {
 514                 /*
 515                  * Allow for some dead records per hash chain, look if we can
 516                  * find one that can hold the new record. We need enough space
 517                  * for key, data and tailer. If we find one, we don't have to
 518                  * consult the central freelist.
 519                  */
 520                 rec_ptr = tdb1_find_dead(
 521                         tdb, hash, &rec,
 522                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 523
 524                 if (rec_ptr != 0) {
 525                         rec.key_len = key.dsize;
 526                         rec.data_len = dbuf.dsize;
 527                         rec.full_hash = hash;
 528                         rec.magic = TDB1_MAGIC;
 529                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 530                             || tdb->methods->tdb1_write(
 531                                     tdb, rec_ptr + sizeof(rec),
 532                                     p, key.dsize + dbuf.dsize) == -1) {
 533                                 goto fail;
 534                         }
 535                         goto done;
 536                 }
 537         }
 538
 539         /*
 540          * We have to allocate some space from the freelist, so this means we
 541          * have to lock it. Use the chance to purge all the DEAD records from
 542          * the hash chain under the freelist lock.
 543          */
 544
 545         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 546                 goto fail;
 547         }
 548
 549         if ((tdb->max_dead_records != 0)
 550             && (tdb1_purge_dead(tdb, hash) == -1)) {
 551                 tdb1_unlock(tdb, -1, F_WRLCK);
 552                 goto fail;
 553         }
 554
 555         /* we have to allocate some space */
 556         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 557
 558         tdb1_unlock(tdb, -1, F_WRLCK);
 559
 560         if (rec_ptr == 0) {
 561                 goto fail;
 562         }
 563
 564         /* Read hash top into next ptr */
 565         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 566                 goto fail;
 567
 568         rec.key_len = key.dsize;
 569         rec.data_len = dbuf.dsize;
 570         rec.full_hash = hash;
 571         rec.magic = TDB1_MAGIC;
 572
 573         /* write out and point the top of the hash chain at it */
 574         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 575             || tdb->methods->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 576             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 577                 /* Need to tdb1_unallocate() here */
 578                 goto fail;
 579         }
 580
 581  done:
 582         ret = 0;
 583  fail:
 584         if (ret == 0) {
 585                 tdb1_increment_seqnum(tdb);
 586         }
 587
 588         SAFE_FREE(p);
 589         return ret;
 590 }
 591
 592 /* store an element in the database, replacing any existing element
 593    with the same key
 594
 595    return 0 on success, -1 on failure
 596 */
 597 _PUBLIC_ int tdb1_store(struct tdb1_context *tdb, TDB1_DATA key, TDB1_DATA dbuf, int flag)
 598 {
 599         uint32_t hash;
 600         int ret;
 601
 602         if (tdb->read_only || tdb->traverse_read) {
 603                 tdb->ecode = TDB1_ERR_RDONLY;
 604                 return -1;
 605         }
 606
 607         /* find which hash bucket it is in */
 608         hash = tdb->hash_fn(&key);
 609         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 610                 return -1;
 611
 612         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 613         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 614         return ret;
 615 }
 616
 617 /* Append to an entry. Create if not exist. */
 618 _PUBLIC_ int tdb1_append(struct tdb1_context *tdb, TDB1_DATA key, TDB1_DATA new_dbuf)
 619 {
 620         uint32_t hash;
 621         TDB1_DATA dbuf;
 622         int ret = -1;
 623
 624         /* find which hash bucket it is in */
 625         hash = tdb->hash_fn(&key);
 626         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 627                 return -1;
 628
 629         dbuf = _tdb1_fetch(tdb, key);
 630
 631         if (dbuf.dptr == NULL) {
 632                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 633         } else {
 634                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 635                 unsigned char *new_dptr;
 636
 637                 /* realloc '0' is special: don't do that. */
 638                 if (new_len == 0)
 639                         new_len = 1;
 640                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 641                 if (new_dptr == NULL) {
 642                         free(dbuf.dptr);
 643                 }
 644                 dbuf.dptr = new_dptr;
 645         }
 646
 647         if (dbuf.dptr == NULL) {
 648                 tdb->ecode = TDB1_ERR_OOM;
 649                 goto failed;
 650         }
 651
 652         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 653         dbuf.dsize += new_dbuf.dsize;
 654
 655         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 656
 657 failed:
 658         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 659         SAFE_FREE(dbuf.dptr);
 660         return ret;
 661 }
 662
 663
 664 /*
 665   return the name of the current tdb file
 666   useful for external logging functions
 667 */
 668 _PUBLIC_ const char *tdb1_name(struct tdb1_context *tdb)
 669 {
 670         return tdb->name;
 671 }
 672
 673 /*
 674   return the underlying file descriptor being used by tdb, or -1
 675   useful for external routines that want to check the device/inode
 676   of the fd
 677 */
 678 _PUBLIC_ int tdb1_fd(struct tdb1_context *tdb)
 679 {
 680         return tdb->fd;
 681 }
 682
 683 /*
 684   return the current logging function
 685   useful for external tdb routines that wish to log tdb errors
 686 */
 687 _PUBLIC_ tdb1_log_func tdb1_log_fn(struct tdb1_context *tdb)
 688 {
 689         return tdb->log.log_fn;
 690 }
 691
 692
 693 /*
 694   get the tdb sequence number. Only makes sense if the writers opened
 695   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 696   quickly, so it should only be used for a 'has something changed'
 697   test, not for code that relies on the count of the number of changes
 698   made. If you want a counter then use a tdb record.
 699
 700   The aim of this sequence number is to allow for a very lightweight
 701   test of a possible tdb change.
 702 */
 703 _PUBLIC_ int tdb1_get_seqnum(struct tdb1_context *tdb)
 704 {
 705         tdb1_off_t seqnum=0;
 706
 707         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 708         return seqnum;
 709 }
 710
 711 _PUBLIC_ int tdb1_hash_size(struct tdb1_context *tdb)
 712 {
 713         return tdb->header.hash_size;
 714 }
 715
 716 _PUBLIC_ size_t tdb1_map_size(struct tdb1_context *tdb)
 717 {
 718         return tdb->map_size;
 719 }
 720
 721 _PUBLIC_ int tdb1_get_flags(struct tdb1_context *tdb)
 722 {
 723         return tdb->flags;
 724 }
 725
 726 _PUBLIC_ void tdb1_add_flags(struct tdb1_context *tdb, unsigned flags)
 727 {
 728         if ((flags & TDB1_ALLOW_NESTING) &&
 729             (flags & TDB1_DISALLOW_NESTING)) {
 730                 tdb->ecode = TDB1_ERR_NESTING;
 731                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_add_flags: "
 732                         "allow_nesting and disallow_nesting are not allowed together!"));
 733                 return;
 734         }
 735
 736         if (flags & TDB1_ALLOW_NESTING) {
 737                 tdb->flags &= ~TDB1_DISALLOW_NESTING;
 738         }
 739         if (flags & TDB1_DISALLOW_NESTING) {
 740                 tdb->flags &= ~TDB1_ALLOW_NESTING;
 741         }
 742
 743         tdb->flags |= flags;
 744 }
 745
 746 _PUBLIC_ void tdb1_remove_flags(struct tdb1_context *tdb, unsigned flags)
 747 {
 748         if ((flags & TDB1_ALLOW_NESTING) &&
 749             (flags & TDB1_DISALLOW_NESTING)) {
 750                 tdb->ecode = TDB1_ERR_NESTING;
 751                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_remove_flags: "
 752                         "allow_nesting and disallow_nesting are not allowed together!"));
 753                 return;
 754         }
 755
 756         if (flags & TDB1_ALLOW_NESTING) {
 757                 tdb->flags |= TDB1_DISALLOW_NESTING;
 758         }
 759         if (flags & TDB1_DISALLOW_NESTING) {
 760                 tdb->flags |= TDB1_ALLOW_NESTING;
 761         }
 762
 763         tdb->flags &= ~flags;
 764 }
 765
 766
 767 /*
 768   enable sequence number handling on an open tdb
 769 */
 770 _PUBLIC_ void tdb1_enable_seqnum(struct tdb1_context *tdb)
 771 {
 772         tdb->flags |= TDB1_SEQNUM;
 773 }
 774
 775
 776 /*
 777   add a region of the file to the freelist. Length is the size of the region in bytes,
 778   which includes the free list header that needs to be added
 779  */
 780 static int tdb1_free_region(struct tdb1_context *tdb, tdb1_off_t offset, ssize_t length)
 781 {
 782         struct tdb1_record rec;
 783         if (length <= sizeof(rec)) {
 784                 /* the region is not worth adding */
 785                 return 0;
 786         }
 787         if (length + offset > tdb->map_size) {
 788                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_free_region: adding region beyond end of file\n"));
 789                 return -1;
 790         }
 791         memset(&rec,'\0',sizeof(rec));
 792         rec.rec_len = length - sizeof(rec);
 793         if (tdb1_free(tdb, offset, &rec) == -1) {
 794                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_free_region: failed to add free record\n"));
 795                 return -1;
 796         }
 797         return 0;
 798 }
 799
 800 /*
 801   wipe the entire database, deleting all records. This can be done
 802   very fast by using a allrecord lock. The entire data portion of the
 803   file becomes a single entry in the freelist.
 804
 805   This code carefully steps around the recovery area, leaving it alone
 806  */
 807 _PUBLIC_ int tdb1_wipe_all(struct tdb1_context *tdb)
 808 {
 809         int i;
 810         tdb1_off_t offset = 0;
 811         ssize_t data_len;
 812         tdb1_off_t recovery_head;
 813         tdb1_len_t recovery_size = 0;
 814
 815         if (tdb1_lockall(tdb) != 0) {
 816                 return -1;
 817         }
 818
 819
 820         /* see if the tdb has a recovery area, and remember its size
 821            if so. We don't want to lose this as otherwise each
 822            tdb1_wipe_all() in a transaction will increase the size of
 823            the tdb by the size of the recovery area */
 824         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 825                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_wipe_all: failed to read recovery head\n"));
 826                 goto failed;
 827         }
 828
 829         if (recovery_head != 0) {
 830                 struct tdb1_record rec;
 831                 if (tdb->methods->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 832                         TDB1_LOG((tdb, TDB1_DEBUG_FATAL, "tdb1_wipe_all: failed to read recovery record\n"));
 833                         return -1;
 834                 }
 835                 recovery_size = rec.rec_len + sizeof(rec);
 836         }
 837
 838         /* wipe the hashes */
 839         for (i=0;i<tdb->header.hash_size;i++) {
 840                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 841                         TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_wipe_all: failed to write hash %d\n", i));
 842                         goto failed;
 843                 }
 844         }
 845
 846         /* wipe the freelist */
 847         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 848                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_wipe_all: failed to write freelist\n"));
 849                 goto failed;
 850         }
 851
 852         /* add all the rest of the file to the freelist, possibly leaving a gap
 853            for the recovery area */
 854         if (recovery_size == 0) {
 855                 /* the simple case - the whole file can be used as a freelist */
 856                 data_len = (tdb->map_size - TDB1_DATA_START(tdb->header.hash_size));
 857                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->header.hash_size), data_len) != 0) {
 858                         goto failed;
 859                 }
 860         } else {
 861                 /* we need to add two freelist entries - one on either
 862                    side of the recovery area
 863
 864                    Note that we cannot shift the recovery area during
 865                    this operation. Only the transaction.c code may
 866                    move the recovery area or we risk subtle data
 867                    corruption
 868                 */
 869                 data_len = (recovery_head - TDB1_DATA_START(tdb->header.hash_size));
 870                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->header.hash_size), data_len) != 0) {
 871                         goto failed;
 872                 }
 873                 /* and the 2nd free list entry after the recovery area - if any */
 874                 data_len = tdb->map_size - (recovery_head+recovery_size);
 875                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 876                         goto failed;
 877                 }
 878         }
 879
 880         if (tdb1_unlockall(tdb) != 0) {
 881                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL,"tdb1_wipe_all: failed to unlock\n"));
 882                 goto failed;
 883         }
 884
 885         return 0;
 886
 887 failed:
 888         tdb1_unlockall(tdb);
 889         return -1;
 890 }
 891
 892 struct traverse_state {
 893         bool error;
 894         struct tdb1_context *dest_db;
 895 };
 896
 897 /*
 898   traverse function for repacking
 899  */
 900 static int repack_traverse(struct tdb1_context *tdb, TDB1_DATA key, TDB1_DATA data, void *private_data)
 901 {
 902         struct traverse_state *state = (struct traverse_state *)private_data;
 903         if (tdb1_store(state->dest_db, key, data, TDB1_INSERT) != 0) {
 904                 state->error = true;
 905                 return -1;
 906         }
 907         return 0;
 908 }
 909
 910 /*
 911   repack a tdb
 912  */
 913 _PUBLIC_ int tdb1_repack(struct tdb1_context *tdb)
 914 {
 915         struct tdb1_context *tmp_db;
 916         struct traverse_state state;
 917
 918         if (tdb1_transaction_start(tdb) != 0) {
 919                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
 920                 return -1;
 921         }
 922
 923         tmp_db = tdb1_open("tmpdb", tdb1_hash_size(tdb), TDB1_INTERNAL, O_RDWR|O_CREAT, 0);
 924         if (tmp_db == NULL) {
 925                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
 926                 tdb1_transaction_cancel(tdb);
 927                 return -1;
 928         }
 929
 930         state.error = false;
 931         state.dest_db = tmp_db;
 932
 933         if (tdb1_traverse_read(tdb, repack_traverse, &state) == -1) {
 934                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
 935                 tdb1_transaction_cancel(tdb);
 936                 tdb1_close(tmp_db);
 937                 return -1;
 938         }
 939
 940         if (state.error) {
 941                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Error during traversal\n"));
 942                 tdb1_transaction_cancel(tdb);
 943                 tdb1_close(tmp_db);
 944                 return -1;
 945         }
 946
 947         if (tdb1_wipe_all(tdb) != 0) {
 948                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
 949                 tdb1_transaction_cancel(tdb);
 950                 tdb1_close(tmp_db);
 951                 return -1;
 952         }
 953
 954         state.error = false;
 955         state.dest_db = tdb;
 956
 957         if (tdb1_traverse_read(tmp_db, repack_traverse, &state) == -1) {
 958                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
 959                 tdb1_transaction_cancel(tdb);
 960                 tdb1_close(tmp_db);
 961                 return -1;
 962         }
 963
 964         if (state.error) {
 965                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Error during second traversal\n"));
 966                 tdb1_transaction_cancel(tdb);
 967                 tdb1_close(tmp_db);
 968                 return -1;
 969         }
 970
 971         tdb1_close(tmp_db);
 972
 973         if (tdb1_transaction_commit(tdb) != 0) {
 974                 TDB1_LOG((tdb, TDB1_DEBUG_FATAL, __location__ " Failed to commit\n"));
 975                 return -1;
 976         }
 977
 978         return 0;
 979 }
 980
 981 /* Even on files, we can get partial writes due to signals. */
 982 bool tdb1_write_all(int fd, const void *buf, size_t count)
 983 {
 984         while (count) {
 985                 ssize_t ret;
 986                 ret = write(fd, buf, count);
 987                 if (ret < 0)
 988                         return false;
 989                 buf = (const char *)buf + ret;
 990                 count -= ret;
 991         }
 992         return true;
 993 }