git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 /*
  32   non-blocking increment of the tdb sequence number if the tdb has been opened using
  33   the TDB_SEQNUM flag
  34 */
  35 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  36 {
  37         tdb1_off_t seqnum=0;
  38
  39         if (!(tdb->flags & TDB_SEQNUM)) {
  40                 return;
  41         }
  42
  43         /* we ignore errors from this, as we have no sane way of
  44            dealing with them.
  45         */
  46         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  47         seqnum++;
  48         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49 }
  50
  51 /*
  52   increment the tdb sequence number if the tdb has been opened using
  53   the TDB_SEQNUM flag
  54 */
  55 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  56 {
  57         if (!(tdb->flags & TDB_SEQNUM)) {
  58                 return;
  59         }
  60
  61         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  62                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  63                 return;
  64         }
  65
  66         tdb1_increment_seqnum_nonblock(tdb);
  67
  68         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  69 }
  70
  71 static enum TDB_ERROR tdb1_key_compare(TDB_DATA key, TDB_DATA data,
  72                                        void *matches_)
  73 {
  74         bool *matches = matches_;
  75         *matches = (memcmp(data.dptr, key.dptr, data.dsize) == 0);
  76         return TDB_SUCCESS;
  77 }
  78
  79 /* Returns 0 on fail; last_error will be TDB_ERR_NOEXIST if it simply
  80  * wasn't there, otherwise a real error.
  81  * On success, return offset of record, and fills in rec */
  82 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  83                         struct tdb1_record *r)
  84 {
  85         tdb1_off_t rec_ptr;
  86
  87         /* read in the hash top */
  88         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  89                 return 0;
  90
  91         /* keep looking until we find the right record */
  92         while (rec_ptr) {
  93                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  94                         return 0;
  95
  96                 tdb->stats.compares++;
  97                 if (TDB1_DEAD(r)) {
  98                         tdb->stats.compare_wrong_bucket++;
  99                 } else if (key.dsize != r->key_len) {
 100                         tdb->stats.compare_wrong_keylen++;
 101                 } else if (hash != r->full_hash) {
 102                         tdb->stats.compare_wrong_rechash++;
 103                 } else {
 104                         enum TDB_ERROR ecode;
 105                         bool matches;
 106                         ecode = tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
 107                                                 r->key_len, tdb1_key_compare,
 108                                                 &matches);
 109
 110                         if (ecode != TDB_SUCCESS) {
 111                                 tdb->last_error = ecode;
 112                                 return 0;
 113                         }
 114
 115                         if (!matches) {
 116                                 tdb->stats.compare_wrong_keycmp++;
 117                         } else {
 118                                 return rec_ptr;
 119                         }
 120                 }
 121                 /* detect tight infinite loop */
 122                 if (rec_ptr == r->next) {
 123                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 124                                                 TDB_LOG_ERROR,
 125                                                 "tdb1_find: loop detected.");
 126                         return 0;
 127                 }
 128                 rec_ptr = r->next;
 129         }
 130         tdb->last_error = TDB_ERR_NOEXIST;
 131         return 0;
 132 }
 133
 134 /* As tdb1_find, but if you succeed, keep the lock */
 135 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 136                            struct tdb1_record *rec)
 137 {
 138         uint32_t rec_ptr;
 139
 140         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 141                 return 0;
 142         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 143                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 144         return rec_ptr;
 145 }
 146
 147 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 148
 149 /* update an entry in place - this only works if the new data size
 150    is <= the old data size and the key exists.
 151    on failure return -1.
 152 */
 153 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 154 {
 155         struct tdb1_record rec;
 156         tdb1_off_t rec_ptr;
 157
 158         /* find entry */
 159         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 160                 return -1;
 161
 162         /* it could be an exact duplicate of what is there - this is
 163          * surprisingly common (eg. with a ldb re-index). */
 164         if (rec.key_len == key.dsize &&
 165             rec.data_len == dbuf.dsize &&
 166             rec.full_hash == hash) {
 167                 TDB_DATA data = _tdb1_fetch(tdb, key);
 168                 if (data.dsize == dbuf.dsize &&
 169                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 170                         if (data.dptr) {
 171                                 free(data.dptr);
 172                         }
 173                         return 0;
 174                 }
 175                 if (data.dptr) {
 176                         free(data.dptr);
 177                 }
 178         }
 179
 180         /* must be long enough key, data and tailer */
 181         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 182                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 183                 return -1;
 184         }
 185
 186         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 187                       dbuf.dptr, dbuf.dsize) == -1)
 188                 return -1;
 189
 190         if (dbuf.dsize != rec.data_len) {
 191                 /* update size */
 192                 rec.data_len = dbuf.dsize;
 193                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 194         }
 195
 196         return 0;
 197 }
 198
 199 /* find an entry in the database given a key */
 200 /* If an entry doesn't exist tdb1_err will be set to
 201  * TDB_ERR_NOEXIST. If a key has no data attached
 202  * then the TDB_DATA will have zero length but
 203  * a non-zero pointer
 204  */
 205 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 206 {
 207         tdb1_off_t rec_ptr;
 208         struct tdb1_record rec;
 209         TDB_DATA ret;
 210         uint32_t hash;
 211
 212         /* find which hash bucket it is in */
 213         hash = tdb_hash(tdb, key.dptr, key.dsize);
 214         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 215                 ret.dptr = NULL;
 216                 ret.dsize = 0;
 217                 return ret;
 218         }
 219
 220         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 221                                   rec.data_len);
 222         ret.dsize = rec.data_len;
 223         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 224         return ret;
 225 }
 226
 227 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
 228 {
 229         *data = _tdb1_fetch(tdb, key);
 230         if (data->dptr == NULL)
 231                 return tdb->last_error;
 232         return TDB_SUCCESS;
 233 }
 234
 235 enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 236                                  enum TDB_ERROR (*parser)(TDB_DATA key,
 237                                                           TDB_DATA data,
 238                                                           void *private_data),
 239                                  void *private_data)
 240 {
 241         tdb1_off_t rec_ptr;
 242         struct tdb1_record rec;
 243         enum TDB_ERROR ret;
 244         uint32_t hash;
 245
 246         /* find which hash bucket it is in */
 247         hash = tdb_hash(tdb, key.dptr, key.dsize);
 248
 249         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 250                 return tdb->last_error;
 251         }
 252
 253         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 254                              rec.data_len, parser, private_data);
 255
 256         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 257
 258         return ret;
 259 }
 260
 261 /* check if an entry in the database exists
 262
 263    note that 1 is returned if the key is found and 0 is returned if not found
 264    this doesn't match the conventions in the rest of this module, but is
 265    compatible with gdbm
 266 */
 267 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 268 {
 269         struct tdb1_record rec;
 270
 271         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 272                 return 0;
 273         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 274         return 1;
 275 }
 276
 277 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 278 {
 279         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 280         int ret;
 281
 282         assert(tdb->flags & TDB_VERSION1);
 283         ret = tdb1_exists_hash(tdb, key, hash);
 284         return ret;
 285 }
 286
 287 /* actually delete an entry in the database given the offset */
 288 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 289 {
 290         tdb1_off_t last_ptr, i;
 291         struct tdb1_record lastrec;
 292
 293         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 294
 295         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 296             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 297                 /* Someone traversing here: mark it as dead */
 298                 rec->magic = TDB1_DEAD_MAGIC;
 299                 return tdb1_rec_write(tdb, rec_ptr, rec);
 300         }
 301         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 302                 return -1;
 303
 304         /* find previous record in hash chain */
 305         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 306                 return -1;
 307         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 308                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 309                         return -1;
 310
 311         /* unlink it: next ptr is at start of record. */
 312         if (last_ptr == 0)
 313                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 314         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 315                 return -1;
 316
 317         /* recover the space */
 318         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 319                 return -1;
 320         return 0;
 321 }
 322
 323 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 324 {
 325         int res = 0;
 326         tdb1_off_t rec_ptr;
 327         struct tdb1_record rec;
 328
 329         /* read in the hash top */
 330         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 331                 return 0;
 332
 333         while (rec_ptr) {
 334                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 335                         return 0;
 336
 337                 if (rec.magic == TDB1_DEAD_MAGIC) {
 338                         res += 1;
 339                 }
 340                 rec_ptr = rec.next;
 341         }
 342         return res;
 343 }
 344
 345 /*
 346  * Purge all DEAD records from a hash chain
 347  */
 348 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 349 {
 350         int res = -1;
 351         struct tdb1_record rec;
 352         tdb1_off_t rec_ptr;
 353
 354         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 355                 return -1;
 356         }
 357
 358         /* read in the hash top */
 359         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 360                 goto fail;
 361
 362         while (rec_ptr) {
 363                 tdb1_off_t next;
 364
 365                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 366                         goto fail;
 367                 }
 368
 369                 next = rec.next;
 370
 371                 if (rec.magic == TDB1_DEAD_MAGIC
 372                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 373                         goto fail;
 374                 }
 375                 rec_ptr = next;
 376         }
 377         res = 0;
 378  fail:
 379         tdb1_unlock(tdb, -1, F_WRLCK);
 380         return res;
 381 }
 382
 383 /* delete an entry in the database given a key */
 384 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 385 {
 386         tdb1_off_t rec_ptr;
 387         struct tdb1_record rec;
 388         int ret;
 389
 390         if (tdb->tdb1.max_dead_records != 0) {
 391
 392                 /*
 393                  * Allow for some dead records per hash chain, mainly for
 394                  * tdb's with a very high create/delete rate like locking.tdb.
 395                  */
 396
 397                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 398                         return -1;
 399
 400                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 401                         /*
 402                          * Don't let the per-chain freelist grow too large,
 403                          * delete all existing dead records
 404                          */
 405                         tdb1_purge_dead(tdb, hash);
 406                 }
 407
 408                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 409                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 410                         return -1;
 411                 }
 412
 413                 /*
 414                  * Just mark the record as dead.
 415                  */
 416                 rec.magic = TDB1_DEAD_MAGIC;
 417                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 418         }
 419         else {
 420                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 421                                                    &rec)))
 422                         return -1;
 423
 424                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 425         }
 426
 427         if (ret == 0) {
 428                 tdb1_increment_seqnum(tdb);
 429         }
 430
 431         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 432                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 433                            "tdb1_delete: WARNING tdb1_unlock failed!");
 434         return ret;
 435 }
 436
 437 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 438 {
 439         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 440         int ret;
 441
 442         assert(tdb->flags & TDB_VERSION1);
 443         ret = tdb1_delete_hash(tdb, key, hash);
 444         return ret;
 445 }
 446
 447 /*
 448  * See if we have a dead record around with enough space
 449  */
 450 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 451                                struct tdb1_record *r, tdb1_len_t length)
 452 {
 453         tdb1_off_t rec_ptr;
 454
 455         /* read in the hash top */
 456         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 457                 return 0;
 458
 459         /* keep looking until we find the right record */
 460         while (rec_ptr) {
 461                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 462                         return 0;
 463
 464                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 465                         /*
 466                          * First fit for simple coding, TODO: change to best
 467                          * fit
 468                          */
 469                         return rec_ptr;
 470                 }
 471                 rec_ptr = r->next;
 472         }
 473         return 0;
 474 }
 475
 476 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 477                        TDB_DATA dbuf, int flag, uint32_t hash)
 478 {
 479         struct tdb1_record rec;
 480         tdb1_off_t rec_ptr;
 481         int ret = -1;
 482
 483         /* check for it existing, on insert. */
 484         if (flag == TDB_INSERT) {
 485                 if (tdb1_exists_hash(tdb, key, hash)) {
 486                         tdb->last_error = TDB_ERR_EXISTS;
 487                         goto fail;
 488                 }
 489                 if (tdb->last_error != TDB_ERR_NOEXIST) {
 490                         goto fail;
 491                 }
 492         } else {
 493                 /* first try in-place update, on modify or replace. */
 494                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 495                         goto done;
 496                 }
 497                 if (tdb->last_error != TDB_SUCCESS) {
 498                         if (tdb->last_error != TDB_ERR_NOEXIST) {
 499                                 goto fail;
 500                         }
 501                         if (flag == TDB_MODIFY) {
 502                                 /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 503                                    we should fail the store */
 504                                 goto fail;
 505                         }
 506                 }
 507         }
 508         /* reset the error code potentially set by the tdb1_update() */
 509         tdb->last_error = TDB_SUCCESS;
 510
 511         /* delete any existing record - if it doesn't exist we don't
 512            care.  Doing this first reduces fragmentation, and avoids
 513            coalescing with `allocated' block before it's updated. */
 514         if (flag != TDB_INSERT)
 515                 tdb1_delete_hash(tdb, key, hash);
 516
 517         if (tdb->tdb1.max_dead_records != 0) {
 518                 /*
 519                  * Allow for some dead records per hash chain, look if we can
 520                  * find one that can hold the new record. We need enough space
 521                  * for key, data and tailer. If we find one, we don't have to
 522                  * consult the central freelist.
 523                  */
 524                 rec_ptr = tdb1_find_dead(
 525                         tdb, hash, &rec,
 526                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 527
 528                 if (rec_ptr != 0) {
 529                         rec.key_len = key.dsize;
 530                         rec.data_len = dbuf.dsize;
 531                         rec.full_hash = hash;
 532                         rec.magic = TDB1_MAGIC;
 533                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 534                             || tdb->tdb1.io->tdb1_write(
 535                                     tdb, rec_ptr + sizeof(rec),
 536                                     key.dptr, key.dsize) == -1
 537                             || tdb->tdb1.io->tdb1_write(
 538                                     tdb, rec_ptr + sizeof(rec) + key.dsize,
 539                                     dbuf.dptr, dbuf.dsize) == -1) {
 540                                 goto fail;
 541                         }
 542                         goto done;
 543                 }
 544         }
 545
 546         /*
 547          * We have to allocate some space from the freelist, so this means we
 548          * have to lock it. Use the chance to purge all the DEAD records from
 549          * the hash chain under the freelist lock.
 550          */
 551
 552         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 553                 goto fail;
 554         }
 555
 556         if ((tdb->tdb1.max_dead_records != 0)
 557             && (tdb1_purge_dead(tdb, hash) == -1)) {
 558                 tdb1_unlock(tdb, -1, F_WRLCK);
 559                 goto fail;
 560         }
 561
 562         /* we have to allocate some space */
 563         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 564
 565         tdb1_unlock(tdb, -1, F_WRLCK);
 566
 567         if (rec_ptr == 0) {
 568                 goto fail;
 569         }
 570
 571         /* Read hash top into next ptr */
 572         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 573                 goto fail;
 574
 575         rec.key_len = key.dsize;
 576         rec.data_len = dbuf.dsize;
 577         rec.full_hash = hash;
 578         rec.magic = TDB1_MAGIC;
 579
 580         /* write out and point the top of the hash chain at it */
 581         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 582             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec),
 583                                         key.dptr, key.dsize) == -1
 584             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + key.dsize,
 585                                         dbuf.dptr, dbuf.dsize) == -1
 586             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 587                 /* Need to tdb1_unallocate() here */
 588                 goto fail;
 589         }
 590
 591  done:
 592         ret = 0;
 593  fail:
 594         if (ret == 0) {
 595                 tdb1_increment_seqnum(tdb);
 596         }
 597         return ret;
 598 }
 599
 600 /* store an element in the database, replacing any existing element
 601    with the same key
 602
 603    return 0 on success, -1 on failure
 604 */
 605 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 606 {
 607         uint32_t hash;
 608         int ret;
 609
 610         assert(tdb->flags & TDB_VERSION1);
 611
 612         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 613                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
 614                                              TDB_LOG_USE_ERROR,
 615                                              "tdb_store: read-only tdb");
 616                 return -1;
 617         }
 618
 619         /* find which hash bucket it is in */
 620         hash = tdb_hash(tdb, key.dptr, key.dsize);
 621         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 622                 return -1;
 623
 624         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 625         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 626         return ret;
 627 }
 628
 629 /* Append to an entry. Create if not exist. */
 630 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 631 {
 632         uint32_t hash;
 633         TDB_DATA dbuf;
 634         int ret = -1;
 635
 636         assert(tdb->flags & TDB_VERSION1);
 637
 638         /* find which hash bucket it is in */
 639         hash = tdb_hash(tdb, key.dptr, key.dsize);
 640         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 641                 return -1;
 642
 643         dbuf = _tdb1_fetch(tdb, key);
 644
 645         if (dbuf.dptr == NULL) {
 646                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 647         } else {
 648                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 649                 unsigned char *new_dptr;
 650
 651                 /* realloc '0' is special: don't do that. */
 652                 if (new_len == 0)
 653                         new_len = 1;
 654                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 655                 if (new_dptr == NULL) {
 656                         free(dbuf.dptr);
 657                 }
 658                 dbuf.dptr = new_dptr;
 659         }
 660
 661         if (dbuf.dptr == NULL) {
 662                 tdb->last_error = TDB_ERR_OOM;
 663                 goto failed;
 664         }
 665
 666         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 667         dbuf.dsize += new_dbuf.dsize;
 668
 669         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 670
 671 failed:
 672         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 673         SAFE_FREE(dbuf.dptr);
 674         return ret;
 675 }
 676
 677
 678 /*
 679   get the tdb sequence number. Only makes sense if the writers opened
 680   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 681   quickly, so it should only be used for a 'has something changed'
 682   test, not for code that relies on the count of the number of changes
 683   made. If you want a counter then use a tdb record.
 684
 685   The aim of this sequence number is to allow for a very lightweight
 686   test of a possible tdb change.
 687 */
 688 int tdb1_get_seqnum(struct tdb_context *tdb)
 689 {
 690         tdb1_off_t seqnum=0;
 691
 692         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 693         return seqnum;
 694 }
 695
 696
 697 /*
 698   add a region of the file to the freelist. Length is the size of the region in bytes,
 699   which includes the free list header that needs to be added
 700  */
 701 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 702 {
 703         struct tdb1_record rec;
 704         if (length <= sizeof(rec)) {
 705                 /* the region is not worth adding */
 706                 return 0;
 707         }
 708         if (length + offset > tdb->file->map_size) {
 709                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 710                                         "tdb1_free_region: adding region beyond"
 711                                         " end of file");
 712                 return -1;
 713         }
 714         memset(&rec,'\0',sizeof(rec));
 715         rec.rec_len = length - sizeof(rec);
 716         if (tdb1_free(tdb, offset, &rec) == -1) {
 717                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 718                            "tdb1_free_region: failed to add free record");
 719                 return -1;
 720         }
 721         return 0;
 722 }
 723
 724 /*
 725   wipe the entire database, deleting all records. This can be done
 726   very fast by using a allrecord lock. The entire data portion of the
 727   file becomes a single entry in the freelist.
 728
 729   This code carefully steps around the recovery area, leaving it alone
 730  */
 731 int tdb1_wipe_all(struct tdb_context *tdb)
 732 {
 733         int i;
 734         tdb1_off_t offset = 0;
 735         ssize_t data_len;
 736         tdb1_off_t recovery_head;
 737         tdb1_len_t recovery_size = 0;
 738
 739         if (tdb_lockall(tdb) != TDB_SUCCESS) {
 740                 return -1;
 741         }
 742
 743
 744         /* see if the tdb has a recovery area, and remember its size
 745            if so. We don't want to lose this as otherwise each
 746            tdb1_wipe_all() in a transaction will increase the size of
 747            the tdb by the size of the recovery area */
 748         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 749                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 750                            "tdb1_wipe_all: failed to read recovery head");
 751                 goto failed;
 752         }
 753
 754         if (recovery_head != 0) {
 755                 struct tdb1_record rec;
 756                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 757                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 758                                    "tdb1_wipe_all: failed to read recovery record");
 759                         return -1;
 760                 }
 761                 recovery_size = rec.rec_len + sizeof(rec);
 762         }
 763
 764         /* wipe the hashes */
 765         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 766                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 767                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 768                                    "tdb1_wipe_all: failed to write hash %d", i);
 769                         goto failed;
 770                 }
 771         }
 772
 773         /* wipe the freelist */
 774         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 775                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 776                            "tdb1_wipe_all: failed to write freelist");
 777                 goto failed;
 778         }
 779
 780         /* add all the rest of the file to the freelist, possibly leaving a gap
 781            for the recovery area */
 782         if (recovery_size == 0) {
 783                 /* the simple case - the whole file can be used as a freelist */
 784                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 785                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 786                         goto failed;
 787                 }
 788         } else {
 789                 /* we need to add two freelist entries - one on either
 790                    side of the recovery area
 791
 792                    Note that we cannot shift the recovery area during
 793                    this operation. Only the transaction.c code may
 794                    move the recovery area or we risk subtle data
 795                    corruption
 796                 */
 797                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 798                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 799                         goto failed;
 800                 }
 801                 /* and the 2nd free list entry after the recovery area - if any */
 802                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 803                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 804                         goto failed;
 805                 }
 806         }
 807
 808         tdb1_increment_seqnum_nonblock(tdb);
 809         tdb_unlockall(tdb);
 810         return 0;
 811
 812 failed:
 813         tdb_unlockall(tdb);
 814         return -1;
 815 }
 816
 817 /* Even on files, we can get partial writes due to signals. */
 818 bool tdb1_write_all(int fd, const void *buf, size_t count)
 819 {
 820         while (count) {
 821                 ssize_t ret;
 822                 ret = write(fd, buf, count);
 823                 if (ret < 0)
 824                         return false;
 825                 buf = (const char *)buf + ret;
 826                 count -= ret;
 827         }
 828         return true;
 829 }