git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 /*
  32   non-blocking increment of the tdb sequence number if the tdb has been opened using
  33   the TDB_SEQNUM flag
  34 */
  35 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  36 {
  37         tdb1_off_t seqnum=0;
  38
  39         if (!(tdb->flags & TDB_SEQNUM)) {
  40                 return;
  41         }
  42
  43         /* we ignore errors from this, as we have no sane way of
  44            dealing with them.
  45         */
  46         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  47         seqnum++;
  48         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49 }
  50
  51 /*
  52   increment the tdb sequence number if the tdb has been opened using
  53   the TDB_SEQNUM flag
  54 */
  55 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  56 {
  57         if (!(tdb->flags & TDB_SEQNUM)) {
  58                 return;
  59         }
  60
  61         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  62                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  63                 return;
  64         }
  65
  66         tdb1_increment_seqnum_nonblock(tdb);
  67
  68         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  69 }
  70
  71 static enum TDB_ERROR tdb1_key_compare(TDB_DATA key, TDB_DATA data,
  72                                        void *matches_)
  73 {
  74         bool *matches = matches_;
  75         *matches = (memcmp(data.dptr, key.dptr, data.dsize) == 0);
  76         return TDB_SUCCESS;
  77 }
  78
  79 /* Returns 0 on fail; last_error will be TDB_ERR_NOEXIST if it simply
  80  * wasn't there, otherwise a real error.
  81  * On success, return offset of record, and fills in rec */
  82 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  83                         struct tdb1_record *r)
  84 {
  85         tdb1_off_t rec_ptr;
  86
  87         /* read in the hash top */
  88         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  89                 return 0;
  90
  91         /* keep looking until we find the right record */
  92         while (rec_ptr) {
  93                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  94                         return 0;
  95
  96                 tdb->stats.compares++;
  97                 if (TDB1_DEAD(r)) {
  98                         tdb->stats.compare_wrong_bucket++;
  99                 } else if (key.dsize != r->key_len) {
 100                         tdb->stats.compare_wrong_keylen++;
 101                 } else if (hash != r->full_hash) {
 102                         tdb->stats.compare_wrong_rechash++;
 103                 } else {
 104                         enum TDB_ERROR ecode;
 105                         bool matches;
 106                         ecode = tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
 107                                                 r->key_len, tdb1_key_compare,
 108                                                 &matches);
 109
 110                         if (ecode != TDB_SUCCESS) {
 111                                 tdb->last_error = ecode;
 112                                 return 0;
 113                         }
 114
 115                         if (!matches) {
 116                                 tdb->stats.compare_wrong_keycmp++;
 117                         } else {
 118                                 return rec_ptr;
 119                         }
 120                 }
 121                 /* detect tight infinite loop */
 122                 if (rec_ptr == r->next) {
 123                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 124                                                 TDB_LOG_ERROR,
 125                                                 "tdb1_find: loop detected.");
 126                         return 0;
 127                 }
 128                 rec_ptr = r->next;
 129         }
 130         tdb->last_error = TDB_ERR_NOEXIST;
 131         return 0;
 132 }
 133
 134 /* As tdb1_find, but if you succeed, keep the lock */
 135 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 136                            struct tdb1_record *rec)
 137 {
 138         uint32_t rec_ptr;
 139
 140         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 141                 return 0;
 142         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 143                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 144         return rec_ptr;
 145 }
 146
 147 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 148
 149 /* update an entry in place - this only works if the new data size
 150    is <= the old data size and the key exists.
 151    on failure return -1.
 152 */
 153 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 154 {
 155         struct tdb1_record rec;
 156         tdb1_off_t rec_ptr;
 157
 158         /* find entry */
 159         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 160                 return -1;
 161
 162         /* it could be an exact duplicate of what is there - this is
 163          * surprisingly common (eg. with a ldb re-index). */
 164         if (rec.key_len == key.dsize &&
 165             rec.data_len == dbuf.dsize &&
 166             rec.full_hash == hash) {
 167                 TDB_DATA data = _tdb1_fetch(tdb, key);
 168                 if (data.dsize == dbuf.dsize &&
 169                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 170                         if (data.dptr) {
 171                                 free(data.dptr);
 172                         }
 173                         return 0;
 174                 }
 175                 if (data.dptr) {
 176                         free(data.dptr);
 177                 }
 178         }
 179
 180         /* must be long enough key, data and tailer */
 181         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 182                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 183                 return -1;
 184         }
 185
 186         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 187                       dbuf.dptr, dbuf.dsize) == -1)
 188                 return -1;
 189
 190         if (dbuf.dsize != rec.data_len) {
 191                 /* update size */
 192                 rec.data_len = dbuf.dsize;
 193                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 194         }
 195
 196         return 0;
 197 }
 198
 199 /* find an entry in the database given a key */
 200 /* If an entry doesn't exist tdb1_err will be set to
 201  * TDB_ERR_NOEXIST. If a key has no data attached
 202  * then the TDB_DATA will have zero length but
 203  * a non-zero pointer
 204  */
 205 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 206 {
 207         tdb1_off_t rec_ptr;
 208         struct tdb1_record rec;
 209         TDB_DATA ret;
 210         uint32_t hash;
 211
 212         /* find which hash bucket it is in */
 213         hash = tdb_hash(tdb, key.dptr, key.dsize);
 214         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 215                 ret.dptr = NULL;
 216                 ret.dsize = 0;
 217                 return ret;
 218         }
 219
 220         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 221                                   rec.data_len);
 222         ret.dsize = rec.data_len;
 223         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 224         return ret;
 225 }
 226
 227 enum TDB_ERROR tdb1_fetch(struct tdb_context *tdb, TDB_DATA key, TDB_DATA *data)
 228 {
 229         *data = _tdb1_fetch(tdb, key);
 230         if (data->dptr == NULL)
 231                 return tdb->last_error;
 232         return TDB_SUCCESS;
 233 }
 234
 235 enum TDB_ERROR tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 236                                  enum TDB_ERROR (*parser)(TDB_DATA key,
 237                                                           TDB_DATA data,
 238                                                           void *private_data),
 239                                  void *private_data)
 240 {
 241         tdb1_off_t rec_ptr;
 242         struct tdb1_record rec;
 243         enum TDB_ERROR ret;
 244         uint32_t hash;
 245
 246         /* find which hash bucket it is in */
 247         hash = tdb_hash(tdb, key.dptr, key.dsize);
 248
 249         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 250                 return tdb->last_error;
 251         }
 252
 253         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 254                              rec.data_len, parser, private_data);
 255
 256         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 257
 258         return ret;
 259 }
 260
 261 /* check if an entry in the database exists
 262
 263    note that 1 is returned if the key is found and 0 is returned if not found
 264    this doesn't match the conventions in the rest of this module, but is
 265    compatible with gdbm
 266 */
 267 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 268 {
 269         struct tdb1_record rec;
 270
 271         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 272                 return 0;
 273         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 274         return 1;
 275 }
 276
 277 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 278 {
 279         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 280         int ret;
 281
 282         assert(tdb->flags & TDB_VERSION1);
 283         ret = tdb1_exists_hash(tdb, key, hash);
 284         return ret;
 285 }
 286
 287 /* actually delete an entry in the database given the offset */
 288 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 289 {
 290         tdb1_off_t last_ptr, i;
 291         struct tdb1_record lastrec;
 292
 293         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 294
 295         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 296             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 297                 /* Someone traversing here: mark it as dead */
 298                 rec->magic = TDB1_DEAD_MAGIC;
 299                 return tdb1_rec_write(tdb, rec_ptr, rec);
 300         }
 301         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 302                 return -1;
 303
 304         /* find previous record in hash chain */
 305         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 306                 return -1;
 307         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 308                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 309                         return -1;
 310
 311         /* unlink it: next ptr is at start of record. */
 312         if (last_ptr == 0)
 313                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 314         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 315                 return -1;
 316
 317         /* recover the space */
 318         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 319                 return -1;
 320         return 0;
 321 }
 322
 323 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 324 {
 325         int res = 0;
 326         tdb1_off_t rec_ptr;
 327         struct tdb1_record rec;
 328
 329         /* read in the hash top */
 330         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 331                 return 0;
 332
 333         while (rec_ptr) {
 334                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 335                         return 0;
 336
 337                 if (rec.magic == TDB1_DEAD_MAGIC) {
 338                         res += 1;
 339                 }
 340                 rec_ptr = rec.next;
 341         }
 342         return res;
 343 }
 344
 345 /*
 346  * Purge all DEAD records from a hash chain
 347  */
 348 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 349 {
 350         int res = -1;
 351         struct tdb1_record rec;
 352         tdb1_off_t rec_ptr;
 353
 354         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 355                 return -1;
 356         }
 357
 358         /* read in the hash top */
 359         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 360                 goto fail;
 361
 362         while (rec_ptr) {
 363                 tdb1_off_t next;
 364
 365                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 366                         goto fail;
 367                 }
 368
 369                 next = rec.next;
 370
 371                 if (rec.magic == TDB1_DEAD_MAGIC
 372                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 373                         goto fail;
 374                 }
 375                 rec_ptr = next;
 376         }
 377         res = 0;
 378  fail:
 379         tdb1_unlock(tdb, -1, F_WRLCK);
 380         return res;
 381 }
 382
 383 /* delete an entry in the database given a key */
 384 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 385 {
 386         tdb1_off_t rec_ptr;
 387         struct tdb1_record rec;
 388         int ret;
 389
 390         if (tdb->tdb1.max_dead_records != 0) {
 391
 392                 /*
 393                  * Allow for some dead records per hash chain, mainly for
 394                  * tdb's with a very high create/delete rate like locking.tdb.
 395                  */
 396
 397                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 398                         return -1;
 399
 400                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 401                         /*
 402                          * Don't let the per-chain freelist grow too large,
 403                          * delete all existing dead records
 404                          */
 405                         tdb1_purge_dead(tdb, hash);
 406                 }
 407
 408                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 409                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 410                         return -1;
 411                 }
 412
 413                 /*
 414                  * Just mark the record as dead.
 415                  */
 416                 rec.magic = TDB1_DEAD_MAGIC;
 417                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 418         }
 419         else {
 420                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 421                                                    &rec)))
 422                         return -1;
 423
 424                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 425         }
 426
 427         if (ret == 0) {
 428                 tdb1_increment_seqnum(tdb);
 429         }
 430
 431         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 432                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 433                            "tdb1_delete: WARNING tdb1_unlock failed!");
 434         return ret;
 435 }
 436
 437 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 438 {
 439         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 440         int ret;
 441
 442         assert(tdb->flags & TDB_VERSION1);
 443         ret = tdb1_delete_hash(tdb, key, hash);
 444         return ret;
 445 }
 446
 447 /*
 448  * See if we have a dead record around with enough space
 449  */
 450 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 451                                struct tdb1_record *r, tdb1_len_t length)
 452 {
 453         tdb1_off_t rec_ptr;
 454
 455         /* read in the hash top */
 456         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 457                 return 0;
 458
 459         /* keep looking until we find the right record */
 460         while (rec_ptr) {
 461                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 462                         return 0;
 463
 464                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 465                         /*
 466                          * First fit for simple coding, TODO: change to best
 467                          * fit
 468                          */
 469                         return rec_ptr;
 470                 }
 471                 rec_ptr = r->next;
 472         }
 473         return 0;
 474 }
 475
 476 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 477                        TDB_DATA dbuf, int flag, uint32_t hash)
 478 {
 479         struct tdb1_record rec;
 480         tdb1_off_t rec_ptr;
 481         char *p = NULL;
 482         int ret = -1;
 483
 484         /* check for it existing, on insert. */
 485         if (flag == TDB_INSERT) {
 486                 if (tdb1_exists_hash(tdb, key, hash)) {
 487                         tdb->last_error = TDB_ERR_EXISTS;
 488                         goto fail;
 489                 }
 490                 if (tdb->last_error != TDB_ERR_NOEXIST) {
 491                         goto fail;
 492                 }
 493         } else {
 494                 /* first try in-place update, on modify or replace. */
 495                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 496                         goto done;
 497                 }
 498                 if (tdb->last_error != TDB_SUCCESS) {
 499                         if (tdb->last_error != TDB_ERR_NOEXIST) {
 500                                 goto fail;
 501                         }
 502                         if (flag == TDB_MODIFY) {
 503                                 /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 504                                    we should fail the store */
 505                                 goto fail;
 506                         }
 507                 }
 508         }
 509         /* reset the error code potentially set by the tdb1_update() */
 510         tdb->last_error = TDB_SUCCESS;
 511
 512         /* delete any existing record - if it doesn't exist we don't
 513            care.  Doing this first reduces fragmentation, and avoids
 514            coalescing with `allocated' block before it's updated. */
 515         if (flag != TDB_INSERT)
 516                 tdb1_delete_hash(tdb, key, hash);
 517
 518         /* Copy key+value *before* allocating free space in case malloc
 519            fails and we are left with a dead spot in the tdb. */
 520
 521         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 522                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
 523                                              "tdb1_store: out of memory"
 524                                              " allocating copy");
 525                 goto fail;
 526         }
 527
 528         memcpy(p, key.dptr, key.dsize);
 529         if (dbuf.dsize)
 530                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 531
 532         if (tdb->tdb1.max_dead_records != 0) {
 533                 /*
 534                  * Allow for some dead records per hash chain, look if we can
 535                  * find one that can hold the new record. We need enough space
 536                  * for key, data and tailer. If we find one, we don't have to
 537                  * consult the central freelist.
 538                  */
 539                 rec_ptr = tdb1_find_dead(
 540                         tdb, hash, &rec,
 541                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 542
 543                 if (rec_ptr != 0) {
 544                         rec.key_len = key.dsize;
 545                         rec.data_len = dbuf.dsize;
 546                         rec.full_hash = hash;
 547                         rec.magic = TDB1_MAGIC;
 548                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 549                             || tdb->tdb1.io->tdb1_write(
 550                                     tdb, rec_ptr + sizeof(rec),
 551                                     p, key.dsize + dbuf.dsize) == -1) {
 552                                 goto fail;
 553                         }
 554                         goto done;
 555                 }
 556         }
 557
 558         /*
 559          * We have to allocate some space from the freelist, so this means we
 560          * have to lock it. Use the chance to purge all the DEAD records from
 561          * the hash chain under the freelist lock.
 562          */
 563
 564         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 565                 goto fail;
 566         }
 567
 568         if ((tdb->tdb1.max_dead_records != 0)
 569             && (tdb1_purge_dead(tdb, hash) == -1)) {
 570                 tdb1_unlock(tdb, -1, F_WRLCK);
 571                 goto fail;
 572         }
 573
 574         /* we have to allocate some space */
 575         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 576
 577         tdb1_unlock(tdb, -1, F_WRLCK);
 578
 579         if (rec_ptr == 0) {
 580                 goto fail;
 581         }
 582
 583         /* Read hash top into next ptr */
 584         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 585                 goto fail;
 586
 587         rec.key_len = key.dsize;
 588         rec.data_len = dbuf.dsize;
 589         rec.full_hash = hash;
 590         rec.magic = TDB1_MAGIC;
 591
 592         /* write out and point the top of the hash chain at it */
 593         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 594             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 595             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 596                 /* Need to tdb1_unallocate() here */
 597                 goto fail;
 598         }
 599
 600  done:
 601         ret = 0;
 602  fail:
 603         if (ret == 0) {
 604                 tdb1_increment_seqnum(tdb);
 605         }
 606
 607         SAFE_FREE(p);
 608         return ret;
 609 }
 610
 611 /* store an element in the database, replacing any existing element
 612    with the same key
 613
 614    return 0 on success, -1 on failure
 615 */
 616 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 617 {
 618         uint32_t hash;
 619         int ret;
 620
 621         assert(tdb->flags & TDB_VERSION1);
 622
 623         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 624                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_RDONLY,
 625                                              TDB_LOG_USE_ERROR,
 626                                              "tdb_store: read-only tdb");
 627                 return -1;
 628         }
 629
 630         /* find which hash bucket it is in */
 631         hash = tdb_hash(tdb, key.dptr, key.dsize);
 632         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 633                 return -1;
 634
 635         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 636         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 637         return ret;
 638 }
 639
 640 /* Append to an entry. Create if not exist. */
 641 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 642 {
 643         uint32_t hash;
 644         TDB_DATA dbuf;
 645         int ret = -1;
 646
 647         assert(tdb->flags & TDB_VERSION1);
 648
 649         /* find which hash bucket it is in */
 650         hash = tdb_hash(tdb, key.dptr, key.dsize);
 651         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 652                 return -1;
 653
 654         dbuf = _tdb1_fetch(tdb, key);
 655
 656         if (dbuf.dptr == NULL) {
 657                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 658         } else {
 659                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 660                 unsigned char *new_dptr;
 661
 662                 /* realloc '0' is special: don't do that. */
 663                 if (new_len == 0)
 664                         new_len = 1;
 665                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 666                 if (new_dptr == NULL) {
 667                         free(dbuf.dptr);
 668                 }
 669                 dbuf.dptr = new_dptr;
 670         }
 671
 672         if (dbuf.dptr == NULL) {
 673                 tdb->last_error = TDB_ERR_OOM;
 674                 goto failed;
 675         }
 676
 677         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 678         dbuf.dsize += new_dbuf.dsize;
 679
 680         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 681
 682 failed:
 683         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 684         SAFE_FREE(dbuf.dptr);
 685         return ret;
 686 }
 687
 688
 689 /*
 690   get the tdb sequence number. Only makes sense if the writers opened
 691   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 692   quickly, so it should only be used for a 'has something changed'
 693   test, not for code that relies on the count of the number of changes
 694   made. If you want a counter then use a tdb record.
 695
 696   The aim of this sequence number is to allow for a very lightweight
 697   test of a possible tdb change.
 698 */
 699 int tdb1_get_seqnum(struct tdb_context *tdb)
 700 {
 701         tdb1_off_t seqnum=0;
 702
 703         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 704         return seqnum;
 705 }
 706
 707
 708 /*
 709   add a region of the file to the freelist. Length is the size of the region in bytes,
 710   which includes the free list header that needs to be added
 711  */
 712 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 713 {
 714         struct tdb1_record rec;
 715         if (length <= sizeof(rec)) {
 716                 /* the region is not worth adding */
 717                 return 0;
 718         }
 719         if (length + offset > tdb->file->map_size) {
 720                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 721                                         "tdb1_free_region: adding region beyond"
 722                                         " end of file");
 723                 return -1;
 724         }
 725         memset(&rec,'\0',sizeof(rec));
 726         rec.rec_len = length - sizeof(rec);
 727         if (tdb1_free(tdb, offset, &rec) == -1) {
 728                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 729                            "tdb1_free_region: failed to add free record");
 730                 return -1;
 731         }
 732         return 0;
 733 }
 734
 735 /*
 736   wipe the entire database, deleting all records. This can be done
 737   very fast by using a allrecord lock. The entire data portion of the
 738   file becomes a single entry in the freelist.
 739
 740   This code carefully steps around the recovery area, leaving it alone
 741  */
 742 int tdb1_wipe_all(struct tdb_context *tdb)
 743 {
 744         int i;
 745         tdb1_off_t offset = 0;
 746         ssize_t data_len;
 747         tdb1_off_t recovery_head;
 748         tdb1_len_t recovery_size = 0;
 749
 750         if (tdb_lockall(tdb) != TDB_SUCCESS) {
 751                 return -1;
 752         }
 753
 754
 755         /* see if the tdb has a recovery area, and remember its size
 756            if so. We don't want to lose this as otherwise each
 757            tdb1_wipe_all() in a transaction will increase the size of
 758            the tdb by the size of the recovery area */
 759         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 760                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 761                            "tdb1_wipe_all: failed to read recovery head");
 762                 goto failed;
 763         }
 764
 765         if (recovery_head != 0) {
 766                 struct tdb1_record rec;
 767                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 768                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 769                                    "tdb1_wipe_all: failed to read recovery record");
 770                         return -1;
 771                 }
 772                 recovery_size = rec.rec_len + sizeof(rec);
 773         }
 774
 775         /* wipe the hashes */
 776         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 777                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 778                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 779                                    "tdb1_wipe_all: failed to write hash %d", i);
 780                         goto failed;
 781                 }
 782         }
 783
 784         /* wipe the freelist */
 785         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 786                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 787                            "tdb1_wipe_all: failed to write freelist");
 788                 goto failed;
 789         }
 790
 791         /* add all the rest of the file to the freelist, possibly leaving a gap
 792            for the recovery area */
 793         if (recovery_size == 0) {
 794                 /* the simple case - the whole file can be used as a freelist */
 795                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 796                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 797                         goto failed;
 798                 }
 799         } else {
 800                 /* we need to add two freelist entries - one on either
 801                    side of the recovery area
 802
 803                    Note that we cannot shift the recovery area during
 804                    this operation. Only the transaction.c code may
 805                    move the recovery area or we risk subtle data
 806                    corruption
 807                 */
 808                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 809                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 810                         goto failed;
 811                 }
 812                 /* and the 2nd free list entry after the recovery area - if any */
 813                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 814                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 815                         goto failed;
 816                 }
 817         }
 818
 819         tdb1_increment_seqnum_nonblock(tdb);
 820         tdb_unlockall(tdb);
 821         return 0;
 822
 823 failed:
 824         tdb_unlockall(tdb);
 825         return -1;
 826 }
 827
 828 /* Even on files, we can get partial writes due to signals. */
 829 bool tdb1_write_all(int fd, const void *buf, size_t count)
 830 {
 831         while (count) {
 832                 ssize_t ret;
 833                 ret = write(fd, buf, count);
 834                 if (ret < 0)
 835                         return false;
 836                 buf = (const char *)buf + ret;
 837                 count -= ret;
 838         }
 839         return true;
 840 }