git.ozlabs.org Git - ccan/blob - ccan/tdb/tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb_private.h"
  29
  30 TDB_DATA tdb_null;
  31
  32 /*
  33   non-blocking increment of the tdb sequence number if the tdb has been opened using
  34   the TDB_SEQNUM flag
  35 */
  36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
  37 {
  38         tdb_off_t seqnum=0;
  39
  40         if (!(tdb->flags & TDB_SEQNUM)) {
  41                 return;
  42         }
  43
  44         /* we ignore errors from this, as we have no sane way of
  45            dealing with them.
  46         */
  47         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
  48         seqnum++;
  49         tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
  50 }
  51
  52 /*
  53   increment the tdb sequence number if the tdb has been opened using
  54   the TDB_SEQNUM flag
  55 */
  56 static void tdb_increment_seqnum(struct tdb_context *tdb)
  57 {
  58         if (!(tdb->flags & TDB_SEQNUM)) {
  59                 return;
  60         }
  61
  62         if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
  63                 return;
  64         }
  65
  66         tdb_increment_seqnum_nonblock(tdb);
  67
  68         tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
  69 }
  70
  71 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  72 {
  73         return memcmp(data.dptr, key.dptr, data.dsize);
  74 }
  75
  76 /* Returns 0 on fail.  On success, return offset of record, and fills
  77    in rec */
  78 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  79                         struct list_struct *r)
  80 {
  81         tdb_off_t rec_ptr;
  82
  83         /* read in the hash top */
  84         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
  85                 return 0;
  86
  87         /* keep looking until we find the right record */
  88         while (rec_ptr) {
  89                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
  90                         return 0;
  91
  92                 if (!TDB_DEAD(r) && hash==r->full_hash
  93                     && key.dsize==r->key_len
  94                     && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
  95                                       r->key_len, tdb_key_compare,
  96                                       NULL) == 0) {
  97                         return rec_ptr;
  98                 }
  99                 /* detect tight infinite loop */
 100                 if (rec_ptr == r->next) {
 101                         tdb->ecode = TDB_ERR_CORRUPT;
 102                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
 103                         return 0;
 104                 }
 105                 rec_ptr = r->next;
 106         }
 107         tdb->ecode = TDB_ERR_NOEXIST;
 108         return 0;
 109 }
 110
 111 /* As tdb_find, but if you succeed, keep the lock */
 112 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 113                            struct list_struct *rec)
 114 {
 115         uint32_t rec_ptr;
 116
 117         if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
 118                 return 0;
 119         if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
 120                 tdb_unlock(tdb, BUCKET(hash), locktype);
 121         return rec_ptr;
 122 }
 123
 124
 125 /* update an entry in place - this only works if the new data size
 126    is <= the old data size and the key exists.
 127    on failure return -1.
 128 */
 129 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 130 {
 131         struct list_struct rec;
 132         tdb_off_t rec_ptr;
 133
 134         /* find entry */
 135         if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
 136                 return -1;
 137
 138         /* must be long enough key, data and tailer */
 139         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
 140                 tdb->ecode = TDB_SUCCESS; /* Not really an error */
 141                 return -1;
 142         }
 143
 144         if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 145                       dbuf.dptr, dbuf.dsize) == -1)
 146                 return -1;
 147
 148         if (dbuf.dsize != rec.data_len) {
 149                 /* update size */
 150                 rec.data_len = dbuf.dsize;
 151                 return tdb_rec_write(tdb, rec_ptr, &rec);
 152         }
 153
 154         return 0;
 155 }
 156
 157 /* find an entry in the database given a key */
 158 /* If an entry doesn't exist tdb_err will be set to
 159  * TDB_ERR_NOEXIST. If a key has no data attached
 160  * then the TDB_DATA will have zero length but
 161  * a non-zero pointer
 162  */
 163 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 164 {
 165         tdb_off_t rec_ptr;
 166         struct list_struct rec;
 167         TDB_DATA ret;
 168         uint32_t hash;
 169
 170         /* find which hash bucket it is in */
 171         hash = tdb->hash_fn(&key);
 172         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 173                 return tdb_null;
 174         }
 175         ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 176                                   rec.data_len);
 177         ret.dsize = rec.data_len;
 178         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 179         return ret;
 180 }
 181
 182 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
 183 {
 184         TDB_DATA ret = _tdb_fetch(tdb, key);
 185
 186         tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
 187         return ret;
 188 }
 189
 190 /*
 191  * Find an entry in the database and hand the record's data to a parsing
 192  * function. The parsing function is executed under the chain read lock, so it
 193  * should be fast and should not block on other syscalls.
 194  *
 195  * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 196  *
 197  * For mmapped tdb's that do not have a transaction open it points the parsing
 198  * function directly at the mmap area, it avoids the malloc/memcpy in this
 199  * case. If a transaction is open or no mmap is available, it has to do
 200  * malloc/read/parse/free.
 201  *
 202  * This is interesting for all readers of potentially large data structures in
 203  * the tdb records, ldb indexes being one example.
 204  */
 205
 206 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
 207                      int (*parser)(TDB_DATA key, TDB_DATA data,
 208                                    void *private_data),
 209                      void *private_data)
 210 {
 211         tdb_off_t rec_ptr;
 212         struct list_struct rec;
 213         int ret;
 214         uint32_t hash;
 215
 216         /* find which hash bucket it is in */
 217         hash = tdb->hash_fn(&key);
 218
 219         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 220                 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key,
 221                                    -TDB_ERR_NOEXIST);
 222                 tdb->ecode = TDB_ERR_NOEXIST;
 223                 return 0;
 224         }
 225         tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
 226
 227         ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 228                              rec.data_len, parser, private_data);
 229
 230         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 231
 232         return ret;
 233 }
 234
 235 /* check if an entry in the database exists
 236
 237    note that 1 is returned if the key is found and 0 is returned if not found
 238    this doesn't match the conventions in the rest of this module, but is
 239    compatible with gdbm
 240 */
 241 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 242 {
 243         struct list_struct rec;
 244
 245         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 246                 return 0;
 247         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
 248         return 1;
 249 }
 250
 251 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
 252 {
 253         uint32_t hash = tdb->hash_fn(&key);
 254         int ret;
 255
 256         ret = tdb_exists_hash(tdb, key, hash);
 257         tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
 258         return ret;
 259 }
 260
 261 /* actually delete an entry in the database given the offset */
 262 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec)
 263 {
 264         tdb_off_t last_ptr, i;
 265         struct list_struct lastrec;
 266
 267         if (tdb->read_only || tdb->traverse_read) return -1;
 268
 269         if (tdb->traverse_write != 0 ||
 270             tdb_write_lock_record(tdb, rec_ptr) == -1) {
 271                 /* Someone traversing here: mark it as dead */
 272                 rec->magic = TDB_DEAD_MAGIC;
 273                 return tdb_rec_write(tdb, rec_ptr, rec);
 274         }
 275         if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
 276                 return -1;
 277
 278         /* find previous record in hash chain */
 279         if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
 280                 return -1;
 281         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 282                 if (tdb_rec_read(tdb, i, &lastrec) == -1)
 283                         return -1;
 284
 285         /* unlink it: next ptr is at start of record. */
 286         if (last_ptr == 0)
 287                 last_ptr = TDB_HASH_TOP(rec->full_hash);
 288         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
 289                 return -1;
 290
 291         /* recover the space */
 292         if (tdb_free(tdb, rec_ptr, rec) == -1)
 293                 return -1;
 294         return 0;
 295 }
 296
 297 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
 298 {
 299         int res = 0;
 300         tdb_off_t rec_ptr;
 301         struct list_struct rec;
 302
 303         /* read in the hash top */
 304         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 305                 return 0;
 306
 307         while (rec_ptr) {
 308                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
 309                         return 0;
 310
 311                 if (rec.magic == TDB_DEAD_MAGIC) {
 312                         res += 1;
 313                 }
 314                 rec_ptr = rec.next;
 315         }
 316         return res;
 317 }
 318
 319 /*
 320  * Purge all DEAD records from a hash chain
 321  */
 322 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
 323 {
 324         int res = -1;
 325         struct list_struct rec;
 326         tdb_off_t rec_ptr;
 327
 328         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 329                 return -1;
 330         }
 331
 332         /* read in the hash top */
 333         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 334                 goto fail;
 335
 336         while (rec_ptr) {
 337                 tdb_off_t next;
 338
 339                 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
 340                         goto fail;
 341                 }
 342
 343                 next = rec.next;
 344
 345                 if (rec.magic == TDB_DEAD_MAGIC
 346                     && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
 347                         goto fail;
 348                 }
 349                 rec_ptr = next;
 350         }
 351         res = 0;
 352  fail:
 353         tdb_unlock(tdb, -1, F_WRLCK);
 354         return res;
 355 }
 356
 357 /* delete an entry in the database given a key */
 358 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 359 {
 360         tdb_off_t rec_ptr;
 361         struct list_struct rec;
 362         int ret;
 363
 364         if (tdb->max_dead_records != 0) {
 365
 366                 /*
 367                  * Allow for some dead records per hash chain, mainly for
 368                  * tdb's with a very high create/delete rate like locking.tdb.
 369                  */
 370
 371                 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 372                         return -1;
 373
 374                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 375                         /*
 376                          * Don't let the per-chain freelist grow too large,
 377                          * delete all existing dead records
 378                          */
 379                         tdb_purge_dead(tdb, hash);
 380                 }
 381
 382                 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
 383                         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 384                         return -1;
 385                 }
 386
 387                 /*
 388                  * Just mark the record as dead.
 389                  */
 390                 rec.magic = TDB_DEAD_MAGIC;
 391                 ret = tdb_rec_write(tdb, rec_ptr, &rec);
 392         }
 393         else {
 394                 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
 395                                                    &rec)))
 396                         return -1;
 397
 398                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
 399         }
 400
 401         if (ret == 0) {
 402                 tdb_increment_seqnum(tdb);
 403         }
 404
 405         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
 406                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
 407         return ret;
 408 }
 409
 410 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 411 {
 412         uint32_t hash = tdb->hash_fn(&key);
 413         int ret;
 414
 415         ret = tdb_delete_hash(tdb, key, hash);
 416         tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
 417         return ret;
 418 }
 419
 420 /*
 421  * See if we have a dead record around with enough space
 422  */
 423 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
 424                                struct list_struct *r, tdb_len_t length)
 425 {
 426         tdb_off_t rec_ptr;
 427
 428         /* read in the hash top */
 429         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
 430                 return 0;
 431
 432         /* keep looking until we find the right record */
 433         while (rec_ptr) {
 434                 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
 435                         return 0;
 436
 437                 if (TDB_DEAD(r) && r->rec_len >= length) {
 438                         /*
 439                          * First fit for simple coding, TODO: change to best
 440                          * fit
 441                          */
 442                         return rec_ptr;
 443                 }
 444                 rec_ptr = r->next;
 445         }
 446         return 0;
 447 }
 448
 449 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
 450                       TDB_DATA dbuf, int flag, uint32_t hash)
 451 {
 452         struct list_struct rec;
 453         tdb_off_t rec_ptr;
 454         char *p = NULL;
 455         int ret = -1;
 456
 457         /* check for it existing, on insert. */
 458         if (flag == TDB_INSERT) {
 459                 if (tdb_exists_hash(tdb, key, hash)) {
 460                         tdb->ecode = TDB_ERR_EXISTS;
 461                         goto fail;
 462                 }
 463         } else {
 464                 /* first try in-place update, on modify or replace. */
 465                 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
 466                         goto done;
 467                 }
 468                 if (tdb->ecode == TDB_ERR_NOEXIST &&
 469                     flag == TDB_MODIFY) {
 470                         /* if the record doesn't exist and we are in TDB_MODIFY mode then
 471                          we should fail the store */
 472                         goto fail;
 473                 }
 474         }
 475         /* reset the error code potentially set by the tdb_update() */
 476         tdb->ecode = TDB_SUCCESS;
 477
 478         /* delete any existing record - if it doesn't exist we don't
 479            care.  Doing this first reduces fragmentation, and avoids
 480            coalescing with `allocated' block before it's updated. */
 481         if (flag != TDB_INSERT)
 482                 tdb_delete_hash(tdb, key, hash);
 483
 484         /* Copy key+value *before* allocating free space in case malloc
 485            fails and we are left with a dead spot in the tdb. */
 486
 487         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 488                 tdb->ecode = TDB_ERR_OOM;
 489                 goto fail;
 490         }
 491
 492         memcpy(p, key.dptr, key.dsize);
 493         if (dbuf.dsize)
 494                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 495
 496         if (tdb->max_dead_records != 0) {
 497                 /*
 498                  * Allow for some dead records per hash chain, look if we can
 499                  * find one that can hold the new record. We need enough space
 500                  * for key, data and tailer. If we find one, we don't have to
 501                  * consult the central freelist.
 502                  */
 503                 rec_ptr = tdb_find_dead(
 504                         tdb, hash, &rec,
 505                         key.dsize + dbuf.dsize + sizeof(tdb_off_t));
 506
 507                 if (rec_ptr != 0) {
 508                         rec.key_len = key.dsize;
 509                         rec.data_len = dbuf.dsize;
 510                         rec.full_hash = hash;
 511                         rec.magic = TDB_MAGIC;
 512                         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 513                             || tdb->methods->tdb_write(
 514                                     tdb, rec_ptr + sizeof(rec),
 515                                     p, key.dsize + dbuf.dsize) == -1) {
 516                                 goto fail;
 517                         }
 518                         goto done;
 519                 }
 520         }
 521
 522         /*
 523          * We have to allocate some space from the freelist, so this means we
 524          * have to lock it. Use the chance to purge all the DEAD records from
 525          * the hash chain under the freelist lock.
 526          */
 527
 528         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 529                 goto fail;
 530         }
 531
 532         if ((tdb->max_dead_records != 0)
 533             && (tdb_purge_dead(tdb, hash) == -1)) {
 534                 tdb_unlock(tdb, -1, F_WRLCK);
 535                 goto fail;
 536         }
 537
 538         /* we have to allocate some space */
 539         rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 540
 541         tdb_unlock(tdb, -1, F_WRLCK);
 542
 543         if (rec_ptr == 0) {
 544                 goto fail;
 545         }
 546
 547         /* Read hash top into next ptr */
 548         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
 549                 goto fail;
 550
 551         rec.key_len = key.dsize;
 552         rec.data_len = dbuf.dsize;
 553         rec.full_hash = hash;
 554         rec.magic = TDB_MAGIC;
 555
 556         /* write out and point the top of the hash chain at it */
 557         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
 558             || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 559             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
 560                 /* Need to tdb_unallocate() here */
 561                 goto fail;
 562         }
 563
 564  done:
 565         ret = 0;
 566  fail:
 567         if (ret == 0) {
 568                 tdb_increment_seqnum(tdb);
 569         }
 570
 571         SAFE_FREE(p);
 572         return ret;
 573 }
 574
 575 /* store an element in the database, replacing any existing element
 576    with the same key
 577
 578    return 0 on success, -1 on failure
 579 */
 580 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 581 {
 582         uint32_t hash;
 583         int ret;
 584
 585         if (tdb->read_only || tdb->traverse_read) {
 586                 tdb->ecode = TDB_ERR_RDONLY;
 587                 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag,
 588                                         -TDB_ERR_RDONLY);
 589                 return -1;
 590         }
 591
 592         /* find which hash bucket it is in */
 593         hash = tdb->hash_fn(&key);
 594         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 595                 return -1;
 596
 597         ret = _tdb_store(tdb, key, dbuf, flag, hash);
 598         tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
 599         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 600         return ret;
 601 }
 602
 603
 604 /* Append to an entry. Create if not exist. */
 605 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 606 {
 607         uint32_t hash;
 608         TDB_DATA dbuf;
 609         int ret = -1;
 610
 611         /* find which hash bucket it is in */
 612         hash = tdb->hash_fn(&key);
 613         if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
 614                 return -1;
 615
 616         dbuf = _tdb_fetch(tdb, key);
 617
 618         if (dbuf.dptr == NULL) {
 619                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 620         } else {
 621                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 622                 unsigned char *new_dptr;
 623
 624                 /* realloc '0' is special: don't do that. */
 625                 if (new_len == 0)
 626                         new_len = 1;
 627                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 628                 if (new_dptr == NULL) {
 629                         free(dbuf.dptr);
 630                 }
 631                 dbuf.dptr = new_dptr;
 632         }
 633
 634         if (dbuf.dptr == NULL) {
 635                 tdb->ecode = TDB_ERR_OOM;
 636                 goto failed;
 637         }
 638
 639         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 640         dbuf.dsize += new_dbuf.dsize;
 641
 642         ret = _tdb_store(tdb, key, dbuf, 0, hash);
 643         tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
 644
 645 failed:
 646         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
 647         SAFE_FREE(dbuf.dptr);
 648         return ret;
 649 }
 650
 651
 652 /*
 653   return the name of the current tdb file
 654   useful for external logging functions
 655 */
 656 const char *tdb_name(struct tdb_context *tdb)
 657 {
 658         return tdb->name;
 659 }
 660
 661 /*
 662   return the underlying file descriptor being used by tdb, or -1
 663   useful for external routines that want to check the device/inode
 664   of the fd
 665 */
 666 int tdb_fd(struct tdb_context *tdb)
 667 {
 668         return tdb->fd;
 669 }
 670
 671 /*
 672   return the current logging function
 673   useful for external tdb routines that wish to log tdb errors
 674 */
 675 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
 676 {
 677         return tdb->log.log_fn;
 678 }
 679
 680
 681 /*
 682   get the tdb sequence number. Only makes sense if the writers opened
 683   with TDB_SEQNUM set. Note that this sequence number will wrap quite
 684   quickly, so it should only be used for a 'has something changed'
 685   test, not for code that relies on the count of the number of changes
 686   made. If you want a counter then use a tdb record.
 687
 688   The aim of this sequence number is to allow for a very lightweight
 689   test of a possible tdb change.
 690 */
 691 int tdb_get_seqnum(struct tdb_context *tdb)
 692 {
 693         tdb_off_t seqnum=0;
 694
 695         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 696         tdb_trace_ret(tdb, "tdb_get_seqnum", seqnum);
 697         return seqnum;
 698 }
 699
 700 int tdb_hash_size(struct tdb_context *tdb)
 701 {
 702         return tdb->header.hash_size;
 703 }
 704
 705 size_t tdb_map_size(struct tdb_context *tdb)
 706 {
 707         return tdb->map_size;
 708 }
 709
 710 int tdb_get_flags(struct tdb_context *tdb)
 711 {
 712         return tdb->flags;
 713 }
 714
 715 void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
 716 {
 717         tdb->flags |= flags;
 718 }
 719
 720 void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
 721 {
 722         tdb->flags &= ~flags;
 723 }
 724
 725
 726 /*
 727   enable sequence number handling on an open tdb
 728 */
 729 void tdb_enable_seqnum(struct tdb_context *tdb)
 730 {
 731         tdb->flags |= TDB_SEQNUM;
 732 }
 733
 734
 735 /*
 736   add a region of the file to the freelist. Length is the size of the region in bytes,
 737   which includes the free list header that needs to be added
 738  */
 739 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
 740 {
 741         struct list_struct rec;
 742         if (length <= sizeof(rec)) {
 743                 /* the region is not worth adding */
 744                 return 0;
 745         }
 746         if (length + offset > tdb->map_size) {
 747                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
 748                 return -1;
 749         }
 750         memset(&rec,'\0',sizeof(rec));
 751         rec.rec_len = length - sizeof(rec);
 752         if (tdb_free(tdb, offset, &rec) == -1) {
 753                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
 754                 return -1;
 755         }
 756         return 0;
 757 }
 758
 759 /*
 760   wipe the entire database, deleting all records. This can be done
 761   very fast by using a global lock. The entire data portion of the
 762   file becomes a single entry in the freelist.
 763
 764   This code carefully steps around the recovery area, leaving it alone
 765  */
 766 int tdb_wipe_all(struct tdb_context *tdb)
 767 {
 768         int i;
 769         tdb_off_t offset = 0;
 770         ssize_t data_len;
 771         tdb_off_t recovery_head;
 772         tdb_len_t recovery_size = 0;
 773
 774         if (tdb_lockall(tdb) != 0) {
 775                 return -1;
 776         }
 777
 778         tdb_trace(tdb, "tdb_wipe_all");
 779
 780         /* see if the tdb has a recovery area, and remember its size
 781            if so. We don't want to lose this as otherwise each
 782            tdb_wipe_all() in a transaction will increase the size of
 783            the tdb by the size of the recovery area */
 784         if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
 785                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
 786                 goto failed;
 787         }
 788
 789         if (recovery_head != 0) {
 790                 struct list_struct rec;
 791                 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
 792                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
 793                         return -1;
 794                 }
 795                 recovery_size = rec.rec_len + sizeof(rec);
 796         }
 797
 798         /* wipe the hashes */
 799         for (i=0;i<tdb->header.hash_size;i++) {
 800                 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
 801                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
 802                         goto failed;
 803                 }
 804         }
 805
 806         /* wipe the freelist */
 807         if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
 808                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
 809                 goto failed;
 810         }
 811
 812         /* add all the rest of the file to the freelist, possibly leaving a gap
 813            for the recovery area */
 814         if (recovery_size == 0) {
 815                 /* the simple case - the whole file can be used as a freelist */
 816                 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
 817                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 818                         goto failed;
 819                 }
 820         } else {
 821                 /* we need to add two freelist entries - one on either
 822                    side of the recovery area
 823
 824                    Note that we cannot shift the recovery area during
 825                    this operation. Only the transaction.c code may
 826                    move the recovery area or we risk subtle data
 827                    corruption
 828                 */
 829                 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
 830                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
 831                         goto failed;
 832                 }
 833                 /* and the 2nd free list entry after the recovery area - if any */
 834                 data_len = tdb->map_size - (recovery_head+recovery_size);
 835                 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 836                         goto failed;
 837                 }
 838         }
 839
 840         if (tdb_unlockall(tdb) != 0) {
 841                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
 842                 goto failed;
 843         }
 844
 845         return 0;
 846
 847 failed:
 848         tdb_unlockall(tdb);
 849         return -1;
 850 }
 851
 852
 853 struct traverse_state {
 854         bool error;
 855         struct tdb_context *dest_db;
 856 };
 857
 858 /*
 859   traverse function for repacking
 860  */
 861 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private)
 862 {
 863         struct traverse_state *state = (struct traverse_state *)private;
 864         if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
 865                 state->error = true;
 866                 return -1;
 867         }
 868         return 0;
 869 }
 870
 871 /*
 872   repack a tdb
 873  */
 874 int tdb_repack(struct tdb_context *tdb)
 875 {
 876         struct tdb_context *tmp_db;
 877         struct traverse_state state;
 878
 879         tdb_trace(tdb, "tdb_repack");
 880
 881         if (tdb_transaction_start(tdb) != 0) {
 882                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to start transaction\n"));
 883                 return -1;
 884         }
 885
 886         tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
 887         if (tmp_db == NULL) {
 888                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to create tmp_db\n"));
 889                 tdb_transaction_cancel(tdb);
 890                 return -1;
 891         }
 892
 893         state.error = false;
 894         state.dest_db = tmp_db;
 895
 896         if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
 897                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to traverse copying out\n"));
 898                 tdb_transaction_cancel(tdb);
 899                 tdb_close(tmp_db);
 900                 return -1;
 901         }
 902
 903         if (state.error) {
 904                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Error during traversal\n"));
 905                 tdb_transaction_cancel(tdb);
 906                 tdb_close(tmp_db);
 907                 return -1;
 908         }
 909
 910         if (tdb_wipe_all(tdb) != 0) {
 911                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to wipe database\n"));
 912                 tdb_transaction_cancel(tdb);
 913                 tdb_close(tmp_db);
 914                 return -1;
 915         }
 916
 917         state.error = false;
 918         state.dest_db = tdb;
 919
 920         if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
 921                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to traverse copying back\n"));
 922                 tdb_transaction_cancel(tdb);
 923                 tdb_close(tmp_db);
 924                 return -1;
 925         }
 926
 927         if (state.error) {
 928                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Error during second traversal\n"));
 929                 tdb_transaction_cancel(tdb);
 930                 tdb_close(tmp_db);
 931                 return -1;
 932         }
 933
 934         tdb_close(tmp_db);
 935
 936         if (tdb_transaction_commit(tdb) != 0) {
 937                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_repack: Failed to commit\n"));
 938                 return -1;
 939         }
 940
 941         return 0;
 942 }
 943
 944 #ifdef TDB_TRACE
 945 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
 946 {
 947         if (write(tdb->tracefd, str, strlen(str)) != strlen(str)) {
 948                 close(tdb->tracefd);
 949                 tdb->tracefd = -1;
 950         }
 951 }
 952
 953 static void tdb_trace_start(struct tdb_context *tdb)
 954 {
 955         tdb_off_t seqnum=0;
 956         char msg[sizeof(tdb_off_t) * 4];
 957
 958         tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
 959         sprintf(msg, "%u ", seqnum);
 960         tdb_trace_write(tdb, msg);
 961 }
 962
 963 static void tdb_trace_end(struct tdb_context *tdb)
 964 {
 965         tdb_trace_write(tdb, "\n");
 966 }
 967
 968 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
 969 {
 970         char msg[sizeof(ret) * 4];
 971         sprintf(msg, " = %i\n", ret);
 972         tdb_trace_write(tdb, msg);
 973 }
 974
 975 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
 976 {
 977         char msg[20 + rec.dsize*2], *p;
 978         unsigned int i;
 979
 980         /* We differentiate zero-length records from non-existent ones. */
 981         if (rec.dptr == NULL) {
 982                 tdb_trace_write(tdb, " NULL");
 983                 return;
 984         }
 985
 986         p = msg;
 987         p += sprintf(p, " %zu:", rec.dsize);
 988         for (i = 0; i < rec.dsize; i++)
 989                 p += sprintf(p, "%02x", rec.dptr[i]);
 990
 991         tdb_trace_write(tdb, msg);
 992 }
 993
 994 void tdb_trace(struct tdb_context *tdb, const char *op)
 995 {
 996         tdb_trace_start(tdb);
 997         tdb_trace_write(tdb, op);
 998         tdb_trace_end(tdb);
 999 }
1000
1001 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1002 {
1003         char msg[sizeof(tdb_off_t) * 4];
1004
1005         sprintf(msg, "%u ", seqnum);
1006         tdb_trace_write(tdb, msg);
1007         tdb_trace_write(tdb, op);
1008         tdb_trace_end(tdb);
1009 }
1010
1011 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1012                     unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1013 {
1014         char msg[128];
1015
1016         sprintf(msg, "%s %u %#x %#x", op, hash_size, tdb_flags, open_flags);
1017         tdb_trace_start(tdb);
1018         tdb_trace_write(tdb, msg);
1019         tdb_trace_end(tdb);
1020 }
1021
1022 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1023 {
1024         tdb_trace_start(tdb);
1025         tdb_trace_write(tdb, op);
1026         tdb_trace_end_ret(tdb, ret);
1027 }
1028
1029 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1030 {
1031         tdb_trace_start(tdb);
1032         tdb_trace_write(tdb, op);
1033         tdb_trace_write(tdb, " =");
1034         tdb_trace_record(tdb, ret);
1035         tdb_trace_end(tdb);
1036 }
1037
1038 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1039                     TDB_DATA rec)
1040 {
1041         tdb_trace_start(tdb);
1042         tdb_trace_write(tdb, op);
1043         tdb_trace_record(tdb, rec);
1044         tdb_trace_end(tdb);
1045 }
1046
1047 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1048                         TDB_DATA rec, int ret)
1049 {
1050         tdb_trace_start(tdb);
1051         tdb_trace_write(tdb, op);
1052         tdb_trace_record(tdb, rec);
1053         tdb_trace_end_ret(tdb, ret);
1054 }
1055
1056 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1057                            TDB_DATA rec, TDB_DATA ret)
1058 {
1059         tdb_trace_start(tdb);
1060         tdb_trace_write(tdb, op);
1061         tdb_trace_record(tdb, rec);
1062         tdb_trace_write(tdb, " =");
1063         tdb_trace_record(tdb, ret);
1064         tdb_trace_end(tdb);
1065 }
1066
1067 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1068                              TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1069                              int ret)
1070 {
1071         char msg[sizeof(ret) * 4];
1072
1073         sprintf(msg, " %#x", flag);
1074         tdb_trace_start(tdb);
1075         tdb_trace_write(tdb, op);
1076         tdb_trace_record(tdb, rec1);
1077         tdb_trace_record(tdb, rec2);
1078         tdb_trace_write(tdb, msg);
1079         tdb_trace_end_ret(tdb, ret);
1080 }
1081
1082 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1083                            TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1084 {
1085         tdb_trace_start(tdb);
1086         tdb_trace_write(tdb, op);
1087         tdb_trace_record(tdb, rec1);
1088         tdb_trace_record(tdb, rec2);
1089         tdb_trace_write(tdb, " =");
1090         tdb_trace_record(tdb, ret);
1091         tdb_trace_end(tdb);
1092 }
1093 #endif