git.ozlabs.org Git - ccan/blob - ccan/tdb/tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb_private.h"
  29
  30 TDB_DATA tdb_null;
  31
  32 /*
  33   non-blocking increment of the tdb sequence number if the tdb has been opened using
  34   the TDB_SEQNUM flag
  35 */
  36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
  37 {
  38         tdb_off_t seqnum=0;
  39
  40         if (!(tdb->flags & TDB_SEQNUM)) {
  41                 return;
  42         }
  43
  44         tdb_trace(tdb, "tdb_increment_seqnum_nonblock");
  45
  46         /* we ignore errors from this, as we have no sane way of
  47            dealing with them.
  48         */
  49         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
  50         seqnum++;
  51         tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
  52 }
  53
  54 /*
  55   increment the tdb sequence number if the tdb has been opened using
  56   the TDB_SEQNUM flag
  57 */
  58 static void tdb_increment_seqnum(struct tdb_context *tdb)
  59 {
  60         if (!(tdb->flags & TDB_SEQNUM)) {
  61                 return;
  62         }
  63
  64         if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
  65                 return;
  66         }
  67
  68         tdb_increment_seqnum_nonblock(tdb);
  69
  70         tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
  71 }
  72
  73 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  74 {
  75         return memcmp(data.dptr, key.dptr, data.dsize);
  76 }
  77
  78 /* Returns 0 on fail.  On success, return offset of record, and fills
  79    in rec */
  80 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  81                         struct list_struct *r)
  82 {
  83         tdb_off_t rec_ptr;
  84
  85         /* read in the hash top */
  86         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
  87                 return 0;
  88
  89         /* keep looking until we find the right record */
  90         while (rec_ptr) {
  91                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
  92                         return 0;
  93
  94                 if (!TDB_DEAD(r) && hash==r->full_hash
  95                     && key.dsize==r->key_len
  96                     && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
  97                                       r->key_len, tdb_key_compare,
  98                                       NULL) == 0) {
  99                         return rec_ptr;
 100                 }
 101                 rec_ptr = r->next;
 102         }
 103         return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 104 }
 105
 106 /* As tdb_find, but if you succeed, keep the lock */
 107 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 108                            struct list_struct *rec)
 109 {
 110         uint32_t rec_ptr;
 111
 112         if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 113                 return 0;
 114         if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
 115                 tdb_unlock(tdb, BUCKET(hash), locktype);
 116         return rec_ptr;
 117 }
 118
 119
 120 /* update an entry in place - this only works if the new data size
 121    is <= the old data size and the key exists.
 122    on failure return -1.
 123 */
 124 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 125 {
 126         struct list_struct rec;
 127         tdb_off_t rec_ptr;
 128
 129         /* find entry */
 130         if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 131                 return -1;
 132
 133         /* must be long enough key, data and tailer */
 134         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 135                 tdb->ecode = TDB_SUCCESS; /* Not really an error */
 136                 return -1;
 137         }
 138
 139         if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 140                       dbuf.dptr, dbuf.dsize) == -1)
 141                 return -1;
 142
 143         if (dbuf.dsize != rec.data_len) {
 144                 /* update size */
 145                 rec.data_len = dbuf.dsize;
 146                 return tdb_rec_write(tdb, rec_ptr, &rec);
 147         }
 148
 149         return 0;
 150 }
 151
 152 /* find an entry in the database given a key */
 153 /* If an entry doesn't exist tdb_err will be set to
 154  * TDB_ERR_NOEXIST. If a key has no data attached
 155  * then the TDB_DATA will have zero length but
 156  * a non-zero pointer
 157  */
 158 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 159 {
 160         tdb_off_t rec_ptr;
 161         struct list_struct rec;
 162         TDB_DATA ret;
 163         uint32_t hash;
 164
 165         /* find which hash bucket it is in */
 166         hash = tdb->hash_fn(&key);
 167         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 168                 tdb_trace(tdb, "tdb_fetch ");
 169                 tdb_trace_record(tdb, key);
 170                 tdb_trace(tdb, "= ENOENT\n");
 171                 return tdb_null;
 172         }
 173         ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 174                                   rec.data_len);
 175         ret.dsize = rec.data_len;
 176         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 177         tdb_trace(tdb, "tdb_fetch ");
 178         tdb_trace_record(tdb, key);
 179         tdb_trace(tdb, "= ");
 180         tdb_trace_record(tdb, ret);
 181         tdb_trace(tdb, "\n");
 182         return ret;
 183 }
 184
 185 /*
 186  * Find an entry in the database and hand the record's data to a parsing
 187  * function. The parsing function is executed under the chain read lock, so it
 188  * should be fast and should not block on other syscalls.
 189  *
 190  * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 191  *
 192  * For mmapped tdb's that do not have a transaction open it points the parsing
 193  * function directly at the mmap area, it avoids the malloc/memcpy in this
 194  * case. If a transaction is open or no mmap is available, it has to do
 195  * malloc/read/parse/free.
 196  *
 197  * This is interesting for all readers of potentially large data structures in
 198  * the tdb records, ldb indexes being one example.
 199  */
 200
 201 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 202                      int (*parser)(TDB_DATA key, TDB_DATA data,
 203                                    void *private_data),
 204                      void *private_data)
 205 {
 206         tdb_off_t rec_ptr;
 207         struct list_struct rec;
 208         int ret;
 209         uint32_t hash;
 210
 211         /* find which hash bucket it is in */
 212         hash = tdb->hash_fn(&key);
 213
 214         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 215                 tdb_trace(tdb, "tdb_parse_record ");
 216                 tdb_trace_record(tdb, key);
 217                 tdb_trace(tdb, "= ENOENT\n");
 218                 return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
 219         }
 220
 221         tdb_trace(tdb, "tdb_parse_record ");
 222         tdb_trace_record(tdb, key);
 223         tdb_trace(tdb, "= %u\n", rec.data_len);
 224
 225         ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 226                              rec.data_len, parser, private_data);
 227
 228         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 229
 230         return ret;
 231 }
 232
 233 /* check if an entry in the database exists
 234
 235    note that 1 is returned if the key is found and 0 is returned if not found
 236    this doesn't match the conventions in the rest of this module, but is
 237    compatible with gdbm
 238 */
 239 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 240 {
 241         struct list_struct rec;
 242
 243         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 244                 return 0;
 245         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 246         return 1;
 247 }
 248
 249 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 250 {
 251         uint32_t hash = tdb->hash_fn(&key);
 252         int ret;
 253
 254         ret = tdb_exists_hash(tdb, key, hash);
 255         tdb_trace(tdb, "tdb_exists ");
 256         tdb_trace_record(tdb, key);
 257         tdb_trace(tdb, "= %i\n", ret);
 258         return ret;
 259 }
 260
 261 /* actually delete an entry in the database given the offset */
 262 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec)
 263 {
 264         tdb_off_t last_ptr, i;
 265         struct list_struct lastrec;
 266
 267         if (tdb->read_only || tdb->traverse_read) return -1;
 268
 269         if (tdb->traverse_write != 0 ||
 270             tdb_write_lock_record(tdb, rec_ptr) == -1) {
 271                 /* Someone traversing here: mark it as dead */
 272                 rec->magic = TDB_DEAD_MAGIC;
 273                 return tdb_rec_write(tdb, rec_ptr, rec);
 274         }
 275         if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
 276                 return -1;
 277
 278         /* find previous record in hash chain */
 279         if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 280                 return -1;
 281         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 282                 if (tdb_rec_read(tdb, i, &lastrec) == -1)
 283                         return -1;
 284
 285         /* unlink it: next ptr is at start of record. */
 286         if (last_ptr == 0)
 287                 last_ptr = TDB_HASH_TOP(rec->full_hash);
 288         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
 289                 return -1;
 290
 291         /* recover the space */
 292         if (tdb_free(tdb, rec_ptr, rec) == -1)
 293                 return -1;
 294         return 0;
 295 }
 296
 297 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
 298 {
 299         int res = 0;
 300         tdb_off_t rec_ptr;
 301         struct list_struct rec;
 302
 303         /* read in the hash top */
 304         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 305                 return 0;
 306
 307         while (rec_ptr) {
 308                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
 309                         return 0;
 310
 311                 if (rec.magic == TDB_DEAD_MAGIC) {
 312                         res += 1;
 313                 }
 314                 rec_ptr = rec.next;
 315         }
 316         return res;
 317 }
 318
 319 /*
 320  * Purge all DEAD records from a hash chain
 321  */
 322 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
 323 {
 324         int res = -1;
 325         struct list_struct rec;
 326         tdb_off_t rec_ptr;
 327
 328         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 329                 return -1;
 330         }
 331
 332         /* read in the hash top */
 333         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 334                 goto fail;
 335
 336         while (rec_ptr) {
 337                 tdb_off_t next;
 338
 339                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
 340                         goto fail;
 341                 }
 342
 343                 next = rec.next;
 344
 345                 if (rec.magic == TDB_DEAD_MAGIC
 346                     && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
 347                         goto fail;
 348                 }
 349                 rec_ptr = next;
 350         }
 351         res = 0;
 352  fail:
 353         tdb_unlock(tdb, -1, F_WRLCK);
 354         return res;
 355 }
 356
 357 /* delete an entry in the database given a key */
 358 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 359 {
 360         tdb_off_t rec_ptr;
 361         struct list_struct rec;
 362         int ret;
 363
 364         if (tdb->max_dead_records != 0) {
 365
 366                 /*
 367                  * Allow for some dead records per hash chain, mainly for
 368                  * tdb's with a very high create/delete rate like locking.tdb.
 369                  */
 370
 371                 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 372                         return -1;
 373
 374                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 375                         /*
 376                          * Don't let the per-chain freelist grow too large,
 377                          * delete all existing dead records
 378                          */
 379                         tdb_purge_dead(tdb, hash);
 380                 }
 381
 382                 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
 383                         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 384                         return -1;
 385                 }
 386
 387                 /*
 388                  * Just mark the record as dead.
 389                  */
 390                 rec.magic = TDB_DEAD_MAGIC;
 391                 ret = tdb_rec_write(tdb, rec_ptr, &rec);
 392         }
 393         else {
 394                 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
 395                                                    &rec)))
 396                         return -1;
 397
 398                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
 399         }
 400
 401         if (ret == 0) {
 402                 tdb_increment_seqnum(tdb);
 403         }
 404
 405         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 406                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 407         return ret;
 408 }
 409
 410 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 411 {
 412         uint32_t hash = tdb->hash_fn(&key);
 413         int ret;
 414
 415         ret = tdb_delete_hash(tdb, key, hash);
 416         tdb_trace(tdb, "tdb_delete ");
 417         tdb_trace_record(tdb, key);
 418         tdb_trace(tdb, "= %s\n", ret ? "ENOENT" : "0");
 419         return ret;
 420 }
 421
 422 /*
 423  * See if we have a dead record around with enough space
 424  */
 425 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
 426                                struct list_struct *r, tdb_len_t length)
 427 {
 428         tdb_off_t rec_ptr;
 429
 430         /* read in the hash top */
 431         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 432                 return 0;
 433
 434         /* keep looking until we find the right record */
 435         while (rec_ptr) {
 436                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 437                         return 0;
 438
 439                 if (TDB_DEAD(r) && r->rec_len >= length) {
 440                         /*
 441                          * First fit for simple coding, TODO: change to best
 442                          * fit
 443                          */
 444                         return rec_ptr;
 445                 }
 446                 rec_ptr = r->next;
 447         }
 448         return 0;
 449 }
 450
 451 /* store an element in the database, replacing any existing element
 452    with the same key
 453
 454    return 0 on success, -1 on failure
 455 */
 456 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 457 {
 458         struct list_struct rec;
 459         uint32_t hash;
 460         tdb_off_t rec_ptr;
 461         char *p = NULL;
 462         int ret = -1;
 463
 464         if (tdb->read_only || tdb->traverse_read) {
 465                 tdb->ecode = TDB_ERR_RDONLY;
 466                 return -1;
 467         }
 468
 469         /* find which hash bucket it is in */
 470         hash = tdb->hash_fn(&key);
 471         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 472                 return -1;
 473
 474         tdb_trace(tdb, "tdb_store %s ", flag == TDB_INSERT ? "insert" :
 475                   flag == TDB_MODIFY ? "modify" : "normal");
 476         tdb_trace_record(tdb, key);
 477         tdb_trace_record(tdb, dbuf);
 478
 479         /* check for it existing, on insert. */
 480         if (flag == TDB_INSERT) {
 481                 if (tdb_exists_hash(tdb, key, hash)) {
 482                         tdb->ecode = TDB_ERR_EXISTS;
 483                         tdb_trace(tdb, "= EEXIST\n");
 484                         goto fail;
 485                 }
 486         } else {
 487                 /* first try in-place update, on modify or replace. */
 488                 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
 489                         tdb_trace(tdb, "= inplace\n");
 490                         goto done;
 491                 }
 492                 if (tdb->ecode == TDB_ERR_NOEXIST &&
 493                     flag == TDB_MODIFY) {
 494                         /* if the record doesn't exist and we are in TDB_MODIFY mode then
 495                          we should fail the store */
 496                         tdb_trace(tdb, "= ENOENT\n");
 497                         goto fail;
 498                 }
 499         }
 500         /* reset the error code potentially set by the tdb_update() */
 501         tdb->ecode = TDB_SUCCESS;
 502
 503         /* delete any existing record - if it doesn't exist we don't
 504            care.  Doing this first reduces fragmentation, and avoids
 505            coalescing with `allocated' block before it's updated. */
 506         if (flag != TDB_INSERT)
 507                 tdb_delete_hash(tdb, key, hash);
 508
 509         /* Copy key+value *before* allocating free space in case malloc
 510            fails and we are left with a dead spot in the tdb. */
 511
 512         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 513                 tdb->ecode = TDB_ERR_OOM;
 514                 goto fail;
 515         }
 516
 517         memcpy(p, key.dptr, key.dsize);
 518         if (dbuf.dsize)
 519                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 520
 521         if (tdb->max_dead_records != 0) {
 522                 /*
 523                  * Allow for some dead records per hash chain, look if we can
 524                  * find one that can hold the new record. We need enough space
 525                  * for key, data and tailer. If we find one, we don't have to
 526                  * consult the central freelist.
 527                  */
 528                 rec_ptr = tdb_find_dead(
 529                         tdb, hash, &rec,
 530                         key.dsize + dbuf.dsize + sizeof(tdb_off_t));
 531
 532                 if (rec_ptr != 0) {
 533                         rec.key_len = key.dsize;
 534                         rec.data_len = dbuf.dsize;
 535                         rec.full_hash = hash;
 536                         rec.magic = TDB_MAGIC;
 537                         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 538                             || tdb->methods->tdb_write(
 539                                     tdb, rec_ptr + sizeof(rec),
 540                                     p, key.dsize + dbuf.dsize) == -1) {
 541                                 goto fail;
 542                         }
 543                         goto done;
 544                         tdb_trace(tdb, "= fromdead\n");
 545                 }
 546         }
 547
 548         /*
 549          * We have to allocate some space from the freelist, so this means we
 550          * have to lock it. Use the chance to purge all the DEAD records from
 551          * the hash chain under the freelist lock.
 552          */
 553
 554         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 555                 goto fail;
 556         }
 557
 558         if ((tdb->max_dead_records != 0)
 559             && (tdb_purge_dead(tdb, hash) == -1)) {
 560                 tdb_unlock(tdb, -1, F_WRLCK);
 561                 goto fail;
 562         }
 563
 564         /* we have to allocate some space */
 565         rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 566
 567         tdb_unlock(tdb, -1, F_WRLCK);
 568
 569         if (rec_ptr == 0) {
 570                 goto fail;
 571         }
 572
 573         /* Read hash top into next ptr */
 574         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 575                 goto fail;
 576
 577         rec.key_len = key.dsize;
 578         rec.data_len = dbuf.dsize;
 579         rec.full_hash = hash;
 580         rec.magic = TDB_MAGIC;
 581
 582         tdb_trace(tdb, "= allocate\n");
 583
 584         /* write out and point the top of the hash chain at it */
 585         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 586             || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 587             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 588                 /* Need to tdb_unallocate() here */
 589                 goto fail;
 590         }
 591
 592  done:
 593         ret = 0;
 594  fail:
 595         if (ret == 0) {
 596                 tdb_increment_seqnum(tdb);
 597         }
 598
 599         SAFE_FREE(p);
 600         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 601         return ret;
 602 }
 603
 604
 605 /* Append to an entry. Create if not exist. */
 606 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 607 {
 608         uint32_t hash;
 609         TDB_DATA dbuf;
 610         int ret = -1;
 611
 612         /* find which hash bucket it is in */
 613         hash = tdb->hash_fn(&key);
 614         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 615                 return -1;
 616
 617         dbuf = tdb_fetch(tdb, key);
 618         tdb_trace(tdb, "tdb_append ");
 619         tdb_trace_record(tdb, key);
 620         tdb_trace_record(tdb, dbuf);
 621         tdb_trace(tdb, "= %s\n", dbuf.dptr ? "insert" : "append");
 622
 623         if (dbuf.dptr == NULL) {
 624                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 625         } else {
 626                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 627                 unsigned char *new_dptr;
 628
 629                 /* realloc '0' is special: don't do that. */
 630                 if (new_len == 0)
 631                         new_len = 1;
 632                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 633                 if (new_dptr == NULL) {
 634                         free(dbuf.dptr);
 635                 }
 636                 dbuf.dptr = new_dptr;
 637         }
 638
 639         if (dbuf.dptr == NULL) {
 640                 tdb->ecode = TDB_ERR_OOM;
 641                 goto failed;
 642         }
 643
 644         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 645         dbuf.dsize += new_dbuf.dsize;
 646
 647         ret = tdb_store(tdb, key, dbuf, 0);
 648
 649 failed:
 650         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 651         SAFE_FREE(dbuf.dptr);
 652         return ret;
 653 }
 654
 655
 656 /*
 657   return the name of the current tdb file
 658   useful for external logging functions
 659 */
 660 const char *tdb_name(struct tdb_context *tdb)
 661 {
 662         return tdb->name;
 663 }
 664
 665 /*
 666   return the underlying file descriptor being used by tdb, or -1
 667   useful for external routines that want to check the device/inode
 668   of the fd
 669 */
 670 int tdb_fd(struct tdb_context *tdb)
 671 {
 672         return tdb->fd;
 673 }
 674
 675 /*
 676   return the current logging function
 677   useful for external tdb routines that wish to log tdb errors
 678 */
 679 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 680 {
 681         return tdb->log.log_fn;
 682 }
 683
 684
 685 /*
 686   get the tdb sequence number. Only makes sense if the writers opened
 687   with TDB_SEQNUM set. Note that this sequence number will wrap quite
 688   quickly, so it should only be used for a 'has something changed'
 689   test, not for code that relies on the count of the number of changes
 690   made. If you want a counter then use a tdb record.
 691
 692   The aim of this sequence number is to allow for a very lightweight
 693   test of a possible tdb change.
 694 */
 695 int tdb_get_seqnum(struct tdb_context *tdb)
 696 {
 697         tdb_off_t seqnum=0;
 698
 699         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 700         tdb_trace(tdb, "tdb_get_seqnum = %u\n", seqnum);
 701         return seqnum;
 702 }
 703
 704 int tdb_hash_size(struct tdb_context *tdb)
 705 {
 706         return tdb->header.hash_size;
 707 }
 708
 709 size_t tdb_map_size(struct tdb_context *tdb)
 710 {
 711         return tdb->map_size;
 712 }
 713
 714 int tdb_get_flags(struct tdb_context *tdb)
 715 {
 716         return tdb->flags;
 717 }
 718
 719 void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
 720 {
 721         tdb->flags |= flags;
 722 }
 723
 724 void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
 725 {
 726         tdb->flags &= ~flags;
 727 }
 728
 729
 730 /*
 731   enable sequence number handling on an open tdb
 732 */
 733 void tdb_enable_seqnum(struct tdb_context *tdb)
 734 {
 735         tdb->flags |= TDB_SEQNUM;
 736 }
 737
 738
 739 /*
 740   add a region of the file to the freelist. Length is the size of the region in bytes,
 741   which includes the free list header that needs to be added
 742  */
 743 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
 744 {
 745         struct list_struct rec;
 746         if (length <= sizeof(rec)) {
 747                 /* the region is not worth adding */
 748                 return 0;
 749         }
 750         if (length + offset > tdb->map_size) {
 751                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
 752                 return -1;
 753         }
 754         memset(&rec,'\0',sizeof(rec));
 755         rec.rec_len = length - sizeof(rec);
 756         if (tdb_free(tdb, offset, &rec) == -1) {
 757                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
 758                 return -1;
 759         }
 760         return 0;
 761 }
 762
 763 /*
 764   wipe the entire database, deleting all records. This can be done
 765   very fast by using a global lock. The entire data portion of the
 766   file becomes a single entry in the freelist.
 767
 768   This code carefully steps around the recovery area, leaving it alone
 769  */
 770 int tdb_wipe_all(struct tdb_context *tdb)
 771 {
 772         int i;
 773         tdb_off_t offset = 0;
 774         ssize_t data_len;
 775         tdb_off_t recovery_head;
 776         tdb_len_t recovery_size = 0;
 777
 778         if (tdb_lockall(tdb) != 0) {
 779                 return -1;
 780         }
 781
 782         tdb_trace(tdb, "tdb_wipe_all\n");
 783
 784         /* see if the tdb has a recovery area, and remember its size
 785            if so. We don't want to lose this as otherwise each
 786            tdb_wipe_all() in a transaction will increase the size of
 787            the tdb by the size of the recovery area */
 788         if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
 789                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
 790                 goto failed;
 791         }
 792
 793         if (recovery_head != 0) {
 794                 struct list_struct rec;
 795                 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
 796                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
 797                         return -1;
 798                 }
 799                 recovery_size = rec.rec_len + sizeof(rec);
 800         }
 801
 802         /* wipe the hashes */
 803         for (i=0;i<tdb->header.hash_size;i++) {
 804                 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
 805                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
 806                         goto failed;
 807                 }
 808         }
 809
 810         /* wipe the freelist */
 811         if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
 812                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
 813                 goto failed;
 814         }
 815
 816         /* add all the rest of the file to the freelist, possibly leaving a gap
 817            for the recovery area */
 818         if (recovery_size == 0) {
 819                 /* the simple case - the whole file can be used as a freelist */
 820                 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
 821                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 822                         goto failed;
 823                 }
 824         } else {
 825                 /* we need to add two freelist entries - one on either
 826                    side of the recovery area
 827
 828                    Note that we cannot shift the recovery area during
 829                    this operation. Only the transaction.c code may
 830                    move the recovery area or we risk subtle data
 831                    corruption
 832                 */
 833                 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
 834                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 835                         goto failed;
 836                 }
 837                 /* and the 2nd free list entry after the recovery area - if any */
 838                 data_len = tdb->map_size - (recovery_head+recovery_size);
 839                 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 840                         goto failed;
 841                 }
 842         }
 843
 844         if (tdb_unlockall(tdb) != 0) {
 845                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
 846                 goto failed;
 847         }
 848
 849         return 0;
 850
 851 failed:
 852         tdb_unlockall(tdb);
 853         return -1;
 854 }
 855
 856 #ifdef TDB_TRACE
 857 #include <stdarg.h>
 858
 859 void tdb_trace(const struct tdb_context *tdb, const char *fmt, ...)
 860 {
 861         char msg[256];
 862         va_list args;
 863         int len, err;
 864
 865         va_start(args, fmt);
 866         len = vsprintf(msg, fmt, args);
 867         va_end(args);
 868
 869         err = write(tdb->tracefd, msg, len);
 870 }
 871
 872 void tdb_trace_record(const struct tdb_context *tdb, TDB_DATA rec)
 873 {
 874         char msg[20];
 875         unsigned int i;
 876         int err;
 877
 878         err = write(tdb->tracefd, msg, sprintf(msg, "%zu:", rec.dsize));
 879         for (i = 0; i < rec.dsize; i++)
 880                 err += write(tdb->tracefd, msg, sprintf(msg, "%02x",
 881                                                         rec.dptr[i]));
 882         err += write(tdb->tracefd, " ", 1);
 883 }
 884 #endif