git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 /*
  32   non-blocking increment of the tdb sequence number if the tdb has been opened using
  33   the TDB_SEQNUM flag
  34 */
  35 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  36 {
  37         tdb1_off_t seqnum=0;
  38
  39         if (!(tdb->flags & TDB_SEQNUM)) {
  40                 return;
  41         }
  42
  43         /* we ignore errors from this, as we have no sane way of
  44            dealing with them.
  45         */
  46         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  47         seqnum++;
  48         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49 }
  50
  51 /*
  52   increment the tdb sequence number if the tdb has been opened using
  53   the TDB_SEQNUM flag
  54 */
  55 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  56 {
  57         if (!(tdb->flags & TDB_SEQNUM)) {
  58                 return;
  59         }
  60
  61         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  62                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  63                 return;
  64         }
  65
  66         tdb1_increment_seqnum_nonblock(tdb);
  67
  68         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  69 }
  70
  71 static int tdb1_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  72 {
  73         return memcmp(data.dptr, key.dptr, data.dsize);
  74 }
  75
  76 /* Returns 0 on fail.  On success, return offset of record, and fills
  77    in rec */
  78 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  79                         struct tdb1_record *r)
  80 {
  81         tdb1_off_t rec_ptr;
  82
  83         /* read in the hash top */
  84         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  85                 return 0;
  86
  87         /* keep looking until we find the right record */
  88         while (rec_ptr) {
  89                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  90                         return 0;
  91
  92                 if (!TDB1_DEAD(r) && hash==r->full_hash
  93                     && key.dsize==r->key_len
  94                     && tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
  95                                       r->key_len, tdb1_key_compare,
  96                                       NULL) == 0) {
  97                         return rec_ptr;
  98                 }
  99                 /* detect tight infinite loop */
 100                 if (rec_ptr == r->next) {
 101                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 102                                                 TDB_LOG_ERROR,
 103                                                 "tdb1_find: loop detected.");
 104                         return 0;
 105                 }
 106                 rec_ptr = r->next;
 107         }
 108         tdb->last_error = TDB_ERR_NOEXIST;
 109         return 0;
 110 }
 111
 112 /* As tdb1_find, but if you succeed, keep the lock */
 113 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 114                            struct tdb1_record *rec)
 115 {
 116         uint32_t rec_ptr;
 117
 118         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 119                 return 0;
 120         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 121                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 122         return rec_ptr;
 123 }
 124
 125 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 126
 127 /* update an entry in place - this only works if the new data size
 128    is <= the old data size and the key exists.
 129    on failure return -1.
 130 */
 131 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 132 {
 133         struct tdb1_record rec;
 134         tdb1_off_t rec_ptr;
 135
 136         /* find entry */
 137         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 138                 return -1;
 139
 140         /* it could be an exact duplicate of what is there - this is
 141          * surprisingly common (eg. with a ldb re-index). */
 142         if (rec.key_len == key.dsize &&
 143             rec.data_len == dbuf.dsize &&
 144             rec.full_hash == hash) {
 145                 TDB_DATA data = _tdb1_fetch(tdb, key);
 146                 if (data.dsize == dbuf.dsize &&
 147                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 148                         if (data.dptr) {
 149                                 free(data.dptr);
 150                         }
 151                         return 0;
 152                 }
 153                 if (data.dptr) {
 154                         free(data.dptr);
 155                 }
 156         }
 157
 158         /* must be long enough key, data and tailer */
 159         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 160                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 161                 return -1;
 162         }
 163
 164         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 165                       dbuf.dptr, dbuf.dsize) == -1)
 166                 return -1;
 167
 168         if (dbuf.dsize != rec.data_len) {
 169                 /* update size */
 170                 rec.data_len = dbuf.dsize;
 171                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 172         }
 173
 174         return 0;
 175 }
 176
 177 /* find an entry in the database given a key */
 178 /* If an entry doesn't exist tdb1_err will be set to
 179  * TDB_ERR_NOEXIST. If a key has no data attached
 180  * then the TDB_DATA will have zero length but
 181  * a non-zero pointer
 182  */
 183 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 184 {
 185         tdb1_off_t rec_ptr;
 186         struct tdb1_record rec;
 187         TDB_DATA ret;
 188         uint32_t hash;
 189
 190         /* find which hash bucket it is in */
 191         hash = tdb_hash(tdb, key.dptr, key.dsize);
 192         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 193                 ret.dptr = NULL;
 194                 ret.dsize = 0;
 195                 return ret;
 196         }
 197
 198         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 199                                   rec.data_len);
 200         ret.dsize = rec.data_len;
 201         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 202         return ret;
 203 }
 204
 205 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
 206 {
 207         *data = _tdb1_fetch(tdb, key);
 208         if (data->dptr == NULL)
 209                 return tdb->last_error;
 210         return TDB_SUCCESS;
 211 }
 212
 213 enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 214                                  enum TDB_ERROR (*parser)(TDB_DATA key,
 215                                                           TDB_DATA data,
 216                                                           void *private_data),
 217                                  void *private_data)
 218 {
 219         tdb1_off_t rec_ptr;
 220         struct tdb1_record rec;
 221         enum TDB_ERROR ret;
 222         uint32_t hash;
 223
 224         /* find which hash bucket it is in */
 225         hash = tdb_hash(tdb, key.dptr, key.dsize);
 226
 227         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 228                 /* record not found */
 229                 return TDB_ERR_NOEXIST;
 230         }
 231
 232         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 233                              rec.data_len, parser, private_data);
 234
 235         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 236
 237         return ret;
 238 }
 239
 240 /* check if an entry in the database exists
 241
 242    note that 1 is returned if the key is found and 0 is returned if not found
 243    this doesn't match the conventions in the rest of this module, but is
 244    compatible with gdbm
 245 */
 246 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 247 {
 248         struct tdb1_record rec;
 249
 250         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 251                 return 0;
 252         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 253         return 1;
 254 }
 255
 256 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 257 {
 258         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 259         int ret;
 260
 261         assert(tdb->flags & TDB_VERSION1);
 262         ret = tdb1_exists_hash(tdb, key, hash);
 263         return ret;
 264 }
 265
 266 /* actually delete an entry in the database given the offset */
 267 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 268 {
 269         tdb1_off_t last_ptr, i;
 270         struct tdb1_record lastrec;
 271
 272         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 273
 274         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 275             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 276                 /* Someone traversing here: mark it as dead */
 277                 rec->magic = TDB1_DEAD_MAGIC;
 278                 return tdb1_rec_write(tdb, rec_ptr, rec);
 279         }
 280         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 281                 return -1;
 282
 283         /* find previous record in hash chain */
 284         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 285                 return -1;
 286         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 287                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 288                         return -1;
 289
 290         /* unlink it: next ptr is at start of record. */
 291         if (last_ptr == 0)
 292                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 293         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 294                 return -1;
 295
 296         /* recover the space */
 297         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 298                 return -1;
 299         return 0;
 300 }
 301
 302 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 303 {
 304         int res = 0;
 305         tdb1_off_t rec_ptr;
 306         struct tdb1_record rec;
 307
 308         /* read in the hash top */
 309         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 310                 return 0;
 311
 312         while (rec_ptr) {
 313                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 314                         return 0;
 315
 316                 if (rec.magic == TDB1_DEAD_MAGIC) {
 317                         res += 1;
 318                 }
 319                 rec_ptr = rec.next;
 320         }
 321         return res;
 322 }
 323
 324 /*
 325  * Purge all DEAD records from a hash chain
 326  */
 327 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 328 {
 329         int res = -1;
 330         struct tdb1_record rec;
 331         tdb1_off_t rec_ptr;
 332
 333         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 334                 return -1;
 335         }
 336
 337         /* read in the hash top */
 338         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 339                 goto fail;
 340
 341         while (rec_ptr) {
 342                 tdb1_off_t next;
 343
 344                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 345                         goto fail;
 346                 }
 347
 348                 next = rec.next;
 349
 350                 if (rec.magic == TDB1_DEAD_MAGIC
 351                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 352                         goto fail;
 353                 }
 354                 rec_ptr = next;
 355         }
 356         res = 0;
 357  fail:
 358         tdb1_unlock(tdb, -1, F_WRLCK);
 359         return res;
 360 }
 361
 362 /* delete an entry in the database given a key */
 363 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 364 {
 365         tdb1_off_t rec_ptr;
 366         struct tdb1_record rec;
 367         int ret;
 368
 369         if (tdb->tdb1.max_dead_records != 0) {
 370
 371                 /*
 372                  * Allow for some dead records per hash chain, mainly for
 373                  * tdb's with a very high create/delete rate like locking.tdb.
 374                  */
 375
 376                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 377                         return -1;
 378
 379                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 380                         /*
 381                          * Don't let the per-chain freelist grow too large,
 382                          * delete all existing dead records
 383                          */
 384                         tdb1_purge_dead(tdb, hash);
 385                 }
 386
 387                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 388                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 389                         return -1;
 390                 }
 391
 392                 /*
 393                  * Just mark the record as dead.
 394                  */
 395                 rec.magic = TDB1_DEAD_MAGIC;
 396                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 397         }
 398         else {
 399                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 400                                                    &rec)))
 401                         return -1;
 402
 403                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 404         }
 405
 406         if (ret == 0) {
 407                 tdb1_increment_seqnum(tdb);
 408         }
 409
 410         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 411                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 412                            "tdb1_delete: WARNING tdb1_unlock failed!");
 413         return ret;
 414 }
 415
 416 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 417 {
 418         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 419         int ret;
 420
 421         assert(tdb->flags & TDB_VERSION1);
 422         ret = tdb1_delete_hash(tdb, key, hash);
 423         return ret;
 424 }
 425
 426 /*
 427  * See if we have a dead record around with enough space
 428  */
 429 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 430                                struct tdb1_record *r, tdb1_len_t length)
 431 {
 432         tdb1_off_t rec_ptr;
 433
 434         /* read in the hash top */
 435         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 436                 return 0;
 437
 438         /* keep looking until we find the right record */
 439         while (rec_ptr) {
 440                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 441                         return 0;
 442
 443                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 444                         /*
 445                          * First fit for simple coding, TODO: change to best
 446                          * fit
 447                          */
 448                         return rec_ptr;
 449                 }
 450                 rec_ptr = r->next;
 451         }
 452         return 0;
 453 }
 454
 455 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 456                        TDB_DATA dbuf, int flag, uint32_t hash)
 457 {
 458         struct tdb1_record rec;
 459         tdb1_off_t rec_ptr;
 460         char *p = NULL;
 461         int ret = -1;
 462
 463         /* check for it existing, on insert. */
 464         if (flag == TDB_INSERT) {
 465                 if (tdb1_exists_hash(tdb, key, hash)) {
 466                         tdb->last_error = TDB_ERR_EXISTS;
 467                         goto fail;
 468                 }
 469         } else {
 470                 /* first try in-place update, on modify or replace. */
 471                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 472                         goto done;
 473                 }
 474                 if (tdb->last_error == TDB_ERR_NOEXIST &&
 475                     flag == TDB_MODIFY) {
 476                         /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 477                          we should fail the store */
 478                         goto fail;
 479                 }
 480         }
 481         /* reset the error code potentially set by the tdb1_update() */
 482         tdb->last_error = TDB_SUCCESS;
 483
 484         /* delete any existing record - if it doesn't exist we don't
 485            care.  Doing this first reduces fragmentation, and avoids
 486            coalescing with `allocated' block before it's updated. */
 487         if (flag != TDB_INSERT)
 488                 tdb1_delete_hash(tdb, key, hash);
 489
 490         /* Copy key+value *before* allocating free space in case malloc
 491            fails and we are left with a dead spot in the tdb. */
 492
 493         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 494                 tdb->last_error = TDB_ERR_OOM;
 495                 goto fail;
 496         }
 497
 498         memcpy(p, key.dptr, key.dsize);
 499         if (dbuf.dsize)
 500                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 501
 502         if (tdb->tdb1.max_dead_records != 0) {
 503                 /*
 504                  * Allow for some dead records per hash chain, look if we can
 505                  * find one that can hold the new record. We need enough space
 506                  * for key, data and tailer. If we find one, we don't have to
 507                  * consult the central freelist.
 508                  */
 509                 rec_ptr = tdb1_find_dead(
 510                         tdb, hash, &rec,
 511                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 512
 513                 if (rec_ptr != 0) {
 514                         rec.key_len = key.dsize;
 515                         rec.data_len = dbuf.dsize;
 516                         rec.full_hash = hash;
 517                         rec.magic = TDB1_MAGIC;
 518                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 519                             || tdb->tdb1.io->tdb1_write(
 520                                     tdb, rec_ptr + sizeof(rec),
 521                                     p, key.dsize + dbuf.dsize) == -1) {
 522                                 goto fail;
 523                         }
 524                         goto done;
 525                 }
 526         }
 527
 528         /*
 529          * We have to allocate some space from the freelist, so this means we
 530          * have to lock it. Use the chance to purge all the DEAD records from
 531          * the hash chain under the freelist lock.
 532          */
 533
 534         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 535                 goto fail;
 536         }
 537
 538         if ((tdb->tdb1.max_dead_records != 0)
 539             && (tdb1_purge_dead(tdb, hash) == -1)) {
 540                 tdb1_unlock(tdb, -1, F_WRLCK);
 541                 goto fail;
 542         }
 543
 544         /* we have to allocate some space */
 545         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 546
 547         tdb1_unlock(tdb, -1, F_WRLCK);
 548
 549         if (rec_ptr == 0) {
 550                 goto fail;
 551         }
 552
 553         /* Read hash top into next ptr */
 554         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 555                 goto fail;
 556
 557         rec.key_len = key.dsize;
 558         rec.data_len = dbuf.dsize;
 559         rec.full_hash = hash;
 560         rec.magic = TDB1_MAGIC;
 561
 562         /* write out and point the top of the hash chain at it */
 563         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 564             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 565             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 566                 /* Need to tdb1_unallocate() here */
 567                 goto fail;
 568         }
 569
 570  done:
 571         ret = 0;
 572  fail:
 573         if (ret == 0) {
 574                 tdb1_increment_seqnum(tdb);
 575         }
 576
 577         SAFE_FREE(p);
 578         return ret;
 579 }
 580
 581 /* store an element in the database, replacing any existing element
 582    with the same key
 583
 584    return 0 on success, -1 on failure
 585 */
 586 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 587 {
 588         uint32_t hash;
 589         int ret;
 590
 591         assert(tdb->flags & TDB_VERSION1);
 592
 593         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 594                 tdb->last_error = TDB_ERR_RDONLY;
 595                 return -1;
 596         }
 597
 598         /* find which hash bucket it is in */
 599         hash = tdb_hash(tdb, key.dptr, key.dsize);
 600         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 601                 return -1;
 602
 603         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 604         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 605         return ret;
 606 }
 607
 608 /* Append to an entry. Create if not exist. */
 609 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 610 {
 611         uint32_t hash;
 612         TDB_DATA dbuf;
 613         int ret = -1;
 614
 615         assert(tdb->flags & TDB_VERSION1);
 616
 617         /* find which hash bucket it is in */
 618         hash = tdb_hash(tdb, key.dptr, key.dsize);
 619         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 620                 return -1;
 621
 622         dbuf = _tdb1_fetch(tdb, key);
 623
 624         if (dbuf.dptr == NULL) {
 625                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 626         } else {
 627                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 628                 unsigned char *new_dptr;
 629
 630                 /* realloc '0' is special: don't do that. */
 631                 if (new_len == 0)
 632                         new_len = 1;
 633                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 634                 if (new_dptr == NULL) {
 635                         free(dbuf.dptr);
 636                 }
 637                 dbuf.dptr = new_dptr;
 638         }
 639
 640         if (dbuf.dptr == NULL) {
 641                 tdb->last_error = TDB_ERR_OOM;
 642                 goto failed;
 643         }
 644
 645         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 646         dbuf.dsize += new_dbuf.dsize;
 647
 648         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 649
 650 failed:
 651         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 652         SAFE_FREE(dbuf.dptr);
 653         return ret;
 654 }
 655
 656
 657 /*
 658   get the tdb sequence number. Only makes sense if the writers opened
 659   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 660   quickly, so it should only be used for a 'has something changed'
 661   test, not for code that relies on the count of the number of changes
 662   made. If you want a counter then use a tdb record.
 663
 664   The aim of this sequence number is to allow for a very lightweight
 665   test of a possible tdb change.
 666 */
 667 int tdb1_get_seqnum(struct tdb_context *tdb)
 668 {
 669         tdb1_off_t seqnum=0;
 670
 671         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 672         return seqnum;
 673 }
 674
 675
 676 /*
 677   add a region of the file to the freelist. Length is the size of the region in bytes,
 678   which includes the free list header that needs to be added
 679  */
 680 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 681 {
 682         struct tdb1_record rec;
 683         if (length <= sizeof(rec)) {
 684                 /* the region is not worth adding */
 685                 return 0;
 686         }
 687         if (length + offset > tdb->file->map_size) {
 688                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 689                                         "tdb1_free_region: adding region beyond"
 690                                         " end of file");
 691                 return -1;
 692         }
 693         memset(&rec,'\0',sizeof(rec));
 694         rec.rec_len = length - sizeof(rec);
 695         if (tdb1_free(tdb, offset, &rec) == -1) {
 696                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 697                            "tdb1_free_region: failed to add free record");
 698                 return -1;
 699         }
 700         return 0;
 701 }
 702
 703 /*
 704   wipe the entire database, deleting all records. This can be done
 705   very fast by using a allrecord lock. The entire data portion of the
 706   file becomes a single entry in the freelist.
 707
 708   This code carefully steps around the recovery area, leaving it alone
 709  */
 710 int tdb1_wipe_all(struct tdb_context *tdb)
 711 {
 712         int i;
 713         tdb1_off_t offset = 0;
 714         ssize_t data_len;
 715         tdb1_off_t recovery_head;
 716         tdb1_len_t recovery_size = 0;
 717
 718         if (tdb_lockall(tdb) != TDB_SUCCESS) {
 719                 return -1;
 720         }
 721
 722
 723         /* see if the tdb has a recovery area, and remember its size
 724            if so. We don't want to lose this as otherwise each
 725            tdb1_wipe_all() in a transaction will increase the size of
 726            the tdb by the size of the recovery area */
 727         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 728                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 729                            "tdb1_wipe_all: failed to read recovery head");
 730                 goto failed;
 731         }
 732
 733         if (recovery_head != 0) {
 734                 struct tdb1_record rec;
 735                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 736                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 737                                    "tdb1_wipe_all: failed to read recovery record");
 738                         return -1;
 739                 }
 740                 recovery_size = rec.rec_len + sizeof(rec);
 741         }
 742
 743         /* wipe the hashes */
 744         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 745                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 746                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 747                                    "tdb1_wipe_all: failed to write hash %d", i);
 748                         goto failed;
 749                 }
 750         }
 751
 752         /* wipe the freelist */
 753         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 754                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 755                            "tdb1_wipe_all: failed to write freelist");
 756                 goto failed;
 757         }
 758
 759         /* add all the rest of the file to the freelist, possibly leaving a gap
 760            for the recovery area */
 761         if (recovery_size == 0) {
 762                 /* the simple case - the whole file can be used as a freelist */
 763                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 764                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 765                         goto failed;
 766                 }
 767         } else {
 768                 /* we need to add two freelist entries - one on either
 769                    side of the recovery area
 770
 771                    Note that we cannot shift the recovery area during
 772                    this operation. Only the transaction.c code may
 773                    move the recovery area or we risk subtle data
 774                    corruption
 775                 */
 776                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 777                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 778                         goto failed;
 779                 }
 780                 /* and the 2nd free list entry after the recovery area - if any */
 781                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 782                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 783                         goto failed;
 784                 }
 785         }
 786
 787         tdb_unlockall(tdb);
 788         return 0;
 789
 790 failed:
 791         tdb_unlockall(tdb);
 792         return -1;
 793 }
 794
 795 /* Even on files, we can get partial writes due to signals. */
 796 bool tdb1_write_all(int fd, const void *buf, size_t count)
 797 {
 798         while (count) {
 799                 ssize_t ret;
 800                 ret = write(fd, buf, count);
 801                 if (ret < 0)
 802                         return false;
 803                 buf = (const char *)buf + ret;
 804                 count -= ret;
 805         }
 806         return true;
 807 }