git.ozlabs.org Git - ccan/blob - ccan/tdb2/tdb1_tdb.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27
  28 #include "tdb1_private.h"
  29 #include <assert.h>
  30
  31 TDB_DATA tdb1_null;
  32
  33 /*
  34   non-blocking increment of the tdb sequence number if the tdb has been opened using
  35   the TDB_SEQNUM flag
  36 */
  37 void tdb1_increment_seqnum_nonblock(struct tdb_context *tdb)
  38 {
  39         tdb1_off_t seqnum=0;
  40
  41         if (!(tdb->flags & TDB_SEQNUM)) {
  42                 return;
  43         }
  44
  45         /* we ignore errors from this, as we have no sane way of
  46            dealing with them.
  47         */
  48         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
  49         seqnum++;
  50         tdb1_ofs_write(tdb, TDB1_SEQNUM_OFS, &seqnum);
  51 }
  52
  53 /*
  54   increment the tdb sequence number if the tdb has been opened using
  55   the TDB_SEQNUM flag
  56 */
  57 static void tdb1_increment_seqnum(struct tdb_context *tdb)
  58 {
  59         if (!(tdb->flags & TDB_SEQNUM)) {
  60                 return;
  61         }
  62
  63         if (tdb1_nest_lock(tdb, TDB1_SEQNUM_OFS, F_WRLCK,
  64                            TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
  65                 return;
  66         }
  67
  68         tdb1_increment_seqnum_nonblock(tdb);
  69
  70         tdb1_nest_unlock(tdb, TDB1_SEQNUM_OFS, F_WRLCK);
  71 }
  72
  73 static int tdb1_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
  74 {
  75         return memcmp(data.dptr, key.dptr, data.dsize);
  76 }
  77
  78 /* Returns 0 on fail.  On success, return offset of record, and fills
  79    in rec */
  80 static tdb1_off_t tdb1_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
  81                         struct tdb1_record *r)
  82 {
  83         tdb1_off_t rec_ptr;
  84
  85         /* read in the hash top */
  86         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
  87                 return 0;
  88
  89         /* keep looking until we find the right record */
  90         while (rec_ptr) {
  91                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
  92                         return 0;
  93
  94                 if (!TDB1_DEAD(r) && hash==r->full_hash
  95                     && key.dsize==r->key_len
  96                     && tdb1_parse_data(tdb, key, rec_ptr + sizeof(*r),
  97                                       r->key_len, tdb1_key_compare,
  98                                       NULL) == 0) {
  99                         return rec_ptr;
 100                 }
 101                 /* detect tight infinite loop */
 102                 if (rec_ptr == r->next) {
 103                         tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT,
 104                                                 TDB_LOG_ERROR,
 105                                                 "tdb1_find: loop detected.");
 106                         return 0;
 107                 }
 108                 rec_ptr = r->next;
 109         }
 110         tdb->last_error = TDB_ERR_NOEXIST;
 111         return 0;
 112 }
 113
 114 /* As tdb1_find, but if you succeed, keep the lock */
 115 tdb1_off_t tdb1_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
 116                            struct tdb1_record *rec)
 117 {
 118         uint32_t rec_ptr;
 119
 120         if (tdb1_lock(tdb, TDB1_BUCKET(hash), locktype) == -1)
 121                 return 0;
 122         if (!(rec_ptr = tdb1_find(tdb, key, hash, rec)))
 123                 tdb1_unlock(tdb, TDB1_BUCKET(hash), locktype);
 124         return rec_ptr;
 125 }
 126
 127 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key);
 128
 129 /* update an entry in place - this only works if the new data size
 130    is <= the old data size and the key exists.
 131    on failure return -1.
 132 */
 133 static int tdb1_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
 134 {
 135         struct tdb1_record rec;
 136         tdb1_off_t rec_ptr;
 137
 138         /* find entry */
 139         if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec)))
 140                 return -1;
 141
 142         /* it could be an exact duplicate of what is there - this is
 143          * surprisingly common (eg. with a ldb re-index). */
 144         if (rec.key_len == key.dsize &&
 145             rec.data_len == dbuf.dsize &&
 146             rec.full_hash == hash) {
 147                 TDB_DATA data = _tdb1_fetch(tdb, key);
 148                 if (data.dsize == dbuf.dsize &&
 149                     memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
 150                         if (data.dptr) {
 151                                 free(data.dptr);
 152                         }
 153                         return 0;
 154                 }
 155                 if (data.dptr) {
 156                         free(data.dptr);
 157                 }
 158         }
 159
 160         /* must be long enough key, data and tailer */
 161         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb1_off_t)) {
 162                 tdb->last_error = TDB_SUCCESS; /* Not really an error */
 163                 return -1;
 164         }
 165
 166         if (tdb->tdb1.io->tdb1_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 167                       dbuf.dptr, dbuf.dsize) == -1)
 168                 return -1;
 169
 170         if (dbuf.dsize != rec.data_len) {
 171                 /* update size */
 172                 rec.data_len = dbuf.dsize;
 173                 return tdb1_rec_write(tdb, rec_ptr, &rec);
 174         }
 175
 176         return 0;
 177 }
 178
 179 /* find an entry in the database given a key */
 180 /* If an entry doesn't exist tdb1_err will be set to
 181  * TDB_ERR_NOEXIST. If a key has no data attached
 182  * then the TDB_DATA will have zero length but
 183  * a non-zero pointer
 184  */
 185 static TDB_DATA _tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 186 {
 187         tdb1_off_t rec_ptr;
 188         struct tdb1_record rec;
 189         TDB_DATA ret;
 190         uint32_t hash;
 191
 192         /* find which hash bucket it is in */
 193         hash = tdb_hash(tdb, key.dptr, key.dsize);
 194         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
 195                 return tdb1_null;
 196
 197         ret.dptr = tdb1_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
 198                                   rec.data_len);
 199         ret.dsize = rec.data_len;
 200         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 201         return ret;
 202 }
 203
 204 TDB_DATA tdb1_fetch(struct tdb_context *tdb, TDB_DATA key)
 205 {
 206         TDB_DATA ret = _tdb1_fetch(tdb, key);
 207
 208         return ret;
 209 }
 210
 211 /*
 212  * Find an entry in the database and hand the record's data to a parsing
 213  * function. The parsing function is executed under the chain read lock, so it
 214  * should be fast and should not block on other syscalls.
 215  *
 216  * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
 217  *
 218  * For mmapped tdb's that do not have a transaction open it points the parsing
 219  * function directly at the mmap area, it avoids the malloc/memcpy in this
 220  * case. If a transaction is open or no mmap is available, it has to do
 221  * malloc/read/parse/free.
 222  *
 223  * This is interesting for all readers of potentially large data structures in
 224  * the tdb records, ldb indexes being one example.
 225  *
 226  * Return -1 if the record was not found.
 227  */
 228
 229 int tdb1_parse_record(struct tdb_context *tdb, TDB_DATA key,
 230                      int (*parser)(TDB_DATA key, TDB_DATA data,
 231                                    void *private_data),
 232                      void *private_data)
 233 {
 234         tdb1_off_t rec_ptr;
 235         struct tdb1_record rec;
 236         int ret;
 237         uint32_t hash;
 238
 239         /* find which hash bucket it is in */
 240         hash = tdb_hash(tdb, key.dptr, key.dsize);
 241
 242         if (!(rec_ptr = tdb1_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
 243                 /* record not found */
 244                 tdb->last_error = TDB_ERR_NOEXIST;
 245                 return -1;
 246         }
 247
 248         ret = tdb1_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
 249                              rec.data_len, parser, private_data);
 250
 251         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 252
 253         return ret;
 254 }
 255
 256 /* check if an entry in the database exists
 257
 258    note that 1 is returned if the key is found and 0 is returned if not found
 259    this doesn't match the conventions in the rest of this module, but is
 260    compatible with gdbm
 261 */
 262 static int tdb1_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 263 {
 264         struct tdb1_record rec;
 265
 266         if (tdb1_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
 267                 return 0;
 268         tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_RDLCK);
 269         return 1;
 270 }
 271
 272 int tdb1_exists(struct tdb_context *tdb, TDB_DATA key)
 273 {
 274         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 275         int ret;
 276
 277         ret = tdb1_exists_hash(tdb, key, hash);
 278         return ret;
 279 }
 280
 281 /* actually delete an entry in the database given the offset */
 282 int tdb1_do_delete(struct tdb_context *tdb, tdb1_off_t rec_ptr, struct tdb1_record *rec)
 283 {
 284         tdb1_off_t last_ptr, i;
 285         struct tdb1_record lastrec;
 286
 287         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) return -1;
 288
 289         if (((tdb->tdb1.traverse_write != 0) && (!TDB1_DEAD(rec))) ||
 290             tdb1_write_lock_record(tdb, rec_ptr) == -1) {
 291                 /* Someone traversing here: mark it as dead */
 292                 rec->magic = TDB1_DEAD_MAGIC;
 293                 return tdb1_rec_write(tdb, rec_ptr, rec);
 294         }
 295         if (tdb1_write_unlock_record(tdb, rec_ptr) != 0)
 296                 return -1;
 297
 298         /* find previous record in hash chain */
 299         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(rec->full_hash), &i) == -1)
 300                 return -1;
 301         for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
 302                 if (tdb1_rec_read(tdb, i, &lastrec) == -1)
 303                         return -1;
 304
 305         /* unlink it: next ptr is at start of record. */
 306         if (last_ptr == 0)
 307                 last_ptr = TDB1_HASH_TOP(rec->full_hash);
 308         if (tdb1_ofs_write(tdb, last_ptr, &rec->next) == -1)
 309                 return -1;
 310
 311         /* recover the space */
 312         if (tdb1_free(tdb, rec_ptr, rec) == -1)
 313                 return -1;
 314         return 0;
 315 }
 316
 317 static int tdb1_count_dead(struct tdb_context *tdb, uint32_t hash)
 318 {
 319         int res = 0;
 320         tdb1_off_t rec_ptr;
 321         struct tdb1_record rec;
 322
 323         /* read in the hash top */
 324         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 325                 return 0;
 326
 327         while (rec_ptr) {
 328                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1)
 329                         return 0;
 330
 331                 if (rec.magic == TDB1_DEAD_MAGIC) {
 332                         res += 1;
 333                 }
 334                 rec_ptr = rec.next;
 335         }
 336         return res;
 337 }
 338
 339 /*
 340  * Purge all DEAD records from a hash chain
 341  */
 342 static int tdb1_purge_dead(struct tdb_context *tdb, uint32_t hash)
 343 {
 344         int res = -1;
 345         struct tdb1_record rec;
 346         tdb1_off_t rec_ptr;
 347
 348         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 349                 return -1;
 350         }
 351
 352         /* read in the hash top */
 353         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 354                 goto fail;
 355
 356         while (rec_ptr) {
 357                 tdb1_off_t next;
 358
 359                 if (tdb1_rec_read(tdb, rec_ptr, &rec) == -1) {
 360                         goto fail;
 361                 }
 362
 363                 next = rec.next;
 364
 365                 if (rec.magic == TDB1_DEAD_MAGIC
 366                     && tdb1_do_delete(tdb, rec_ptr, &rec) == -1) {
 367                         goto fail;
 368                 }
 369                 rec_ptr = next;
 370         }
 371         res = 0;
 372  fail:
 373         tdb1_unlock(tdb, -1, F_WRLCK);
 374         return res;
 375 }
 376
 377 /* delete an entry in the database given a key */
 378 static int tdb1_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
 379 {
 380         tdb1_off_t rec_ptr;
 381         struct tdb1_record rec;
 382         int ret;
 383
 384         if (tdb->tdb1.max_dead_records != 0) {
 385
 386                 /*
 387                  * Allow for some dead records per hash chain, mainly for
 388                  * tdb's with a very high create/delete rate like locking.tdb.
 389                  */
 390
 391                 if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 392                         return -1;
 393
 394                 if (tdb1_count_dead(tdb, hash) >= tdb->tdb1.max_dead_records) {
 395                         /*
 396                          * Don't let the per-chain freelist grow too large,
 397                          * delete all existing dead records
 398                          */
 399                         tdb1_purge_dead(tdb, hash);
 400                 }
 401
 402                 if (!(rec_ptr = tdb1_find(tdb, key, hash, &rec))) {
 403                         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 404                         return -1;
 405                 }
 406
 407                 /*
 408                  * Just mark the record as dead.
 409                  */
 410                 rec.magic = TDB1_DEAD_MAGIC;
 411                 ret = tdb1_rec_write(tdb, rec_ptr, &rec);
 412         }
 413         else {
 414                 if (!(rec_ptr = tdb1_find_lock_hash(tdb, key, hash, F_WRLCK,
 415                                                    &rec)))
 416                         return -1;
 417
 418                 ret = tdb1_do_delete(tdb, rec_ptr, &rec);
 419         }
 420
 421         if (ret == 0) {
 422                 tdb1_increment_seqnum(tdb);
 423         }
 424
 425         if (tdb1_unlock(tdb, TDB1_BUCKET(rec.full_hash), F_WRLCK) != 0)
 426                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 427                            "tdb1_delete: WARNING tdb1_unlock failed!");
 428         return ret;
 429 }
 430
 431 int tdb1_delete(struct tdb_context *tdb, TDB_DATA key)
 432 {
 433         uint32_t hash = tdb_hash(tdb, key.dptr, key.dsize);
 434         int ret;
 435
 436         ret = tdb1_delete_hash(tdb, key, hash);
 437         return ret;
 438 }
 439
 440 /*
 441  * See if we have a dead record around with enough space
 442  */
 443 static tdb1_off_t tdb1_find_dead(struct tdb_context *tdb, uint32_t hash,
 444                                struct tdb1_record *r, tdb1_len_t length)
 445 {
 446         tdb1_off_t rec_ptr;
 447
 448         /* read in the hash top */
 449         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1)
 450                 return 0;
 451
 452         /* keep looking until we find the right record */
 453         while (rec_ptr) {
 454                 if (tdb1_rec_read(tdb, rec_ptr, r) == -1)
 455                         return 0;
 456
 457                 if (TDB1_DEAD(r) && r->rec_len >= length) {
 458                         /*
 459                          * First fit for simple coding, TODO: change to best
 460                          * fit
 461                          */
 462                         return rec_ptr;
 463                 }
 464                 rec_ptr = r->next;
 465         }
 466         return 0;
 467 }
 468
 469 static int _tdb1_store(struct tdb_context *tdb, TDB_DATA key,
 470                        TDB_DATA dbuf, int flag, uint32_t hash)
 471 {
 472         struct tdb1_record rec;
 473         tdb1_off_t rec_ptr;
 474         char *p = NULL;
 475         int ret = -1;
 476
 477         /* check for it existing, on insert. */
 478         if (flag == TDB_INSERT) {
 479                 if (tdb1_exists_hash(tdb, key, hash)) {
 480                         tdb->last_error = TDB_ERR_EXISTS;
 481                         goto fail;
 482                 }
 483         } else {
 484                 /* first try in-place update, on modify or replace. */
 485                 if (tdb1_update_hash(tdb, key, hash, dbuf) == 0) {
 486                         goto done;
 487                 }
 488                 if (tdb->last_error == TDB_ERR_NOEXIST &&
 489                     flag == TDB_MODIFY) {
 490                         /* if the record doesn't exist and we are in TDB1_MODIFY mode then
 491                          we should fail the store */
 492                         goto fail;
 493                 }
 494         }
 495         /* reset the error code potentially set by the tdb1_update() */
 496         tdb->last_error = TDB_SUCCESS;
 497
 498         /* delete any existing record - if it doesn't exist we don't
 499            care.  Doing this first reduces fragmentation, and avoids
 500            coalescing with `allocated' block before it's updated. */
 501         if (flag != TDB_INSERT)
 502                 tdb1_delete_hash(tdb, key, hash);
 503
 504         /* Copy key+value *before* allocating free space in case malloc
 505            fails and we are left with a dead spot in the tdb. */
 506
 507         if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
 508                 tdb->last_error = TDB_ERR_OOM;
 509                 goto fail;
 510         }
 511
 512         memcpy(p, key.dptr, key.dsize);
 513         if (dbuf.dsize)
 514                 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
 515
 516         if (tdb->tdb1.max_dead_records != 0) {
 517                 /*
 518                  * Allow for some dead records per hash chain, look if we can
 519                  * find one that can hold the new record. We need enough space
 520                  * for key, data and tailer. If we find one, we don't have to
 521                  * consult the central freelist.
 522                  */
 523                 rec_ptr = tdb1_find_dead(
 524                         tdb, hash, &rec,
 525                         key.dsize + dbuf.dsize + sizeof(tdb1_off_t));
 526
 527                 if (rec_ptr != 0) {
 528                         rec.key_len = key.dsize;
 529                         rec.data_len = dbuf.dsize;
 530                         rec.full_hash = hash;
 531                         rec.magic = TDB1_MAGIC;
 532                         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 533                             || tdb->tdb1.io->tdb1_write(
 534                                     tdb, rec_ptr + sizeof(rec),
 535                                     p, key.dsize + dbuf.dsize) == -1) {
 536                                 goto fail;
 537                         }
 538                         goto done;
 539                 }
 540         }
 541
 542         /*
 543          * We have to allocate some space from the freelist, so this means we
 544          * have to lock it. Use the chance to purge all the DEAD records from
 545          * the hash chain under the freelist lock.
 546          */
 547
 548         if (tdb1_lock(tdb, -1, F_WRLCK) == -1) {
 549                 goto fail;
 550         }
 551
 552         if ((tdb->tdb1.max_dead_records != 0)
 553             && (tdb1_purge_dead(tdb, hash) == -1)) {
 554                 tdb1_unlock(tdb, -1, F_WRLCK);
 555                 goto fail;
 556         }
 557
 558         /* we have to allocate some space */
 559         rec_ptr = tdb1_allocate(tdb, key.dsize + dbuf.dsize, &rec);
 560
 561         tdb1_unlock(tdb, -1, F_WRLCK);
 562
 563         if (rec_ptr == 0) {
 564                 goto fail;
 565         }
 566
 567         /* Read hash top into next ptr */
 568         if (tdb1_ofs_read(tdb, TDB1_HASH_TOP(hash), &rec.next) == -1)
 569                 goto fail;
 570
 571         rec.key_len = key.dsize;
 572         rec.data_len = dbuf.dsize;
 573         rec.full_hash = hash;
 574         rec.magic = TDB1_MAGIC;
 575
 576         /* write out and point the top of the hash chain at it */
 577         if (tdb1_rec_write(tdb, rec_ptr, &rec) == -1
 578             || tdb->tdb1.io->tdb1_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
 579             || tdb1_ofs_write(tdb, TDB1_HASH_TOP(hash), &rec_ptr) == -1) {
 580                 /* Need to tdb1_unallocate() here */
 581                 goto fail;
 582         }
 583
 584  done:
 585         ret = 0;
 586  fail:
 587         if (ret == 0) {
 588                 tdb1_increment_seqnum(tdb);
 589         }
 590
 591         SAFE_FREE(p);
 592         return ret;
 593 }
 594
 595 /* store an element in the database, replacing any existing element
 596    with the same key
 597
 598    return 0 on success, -1 on failure
 599 */
 600 int tdb1_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
 601 {
 602         uint32_t hash;
 603         int ret;
 604
 605         assert(tdb->flags & TDB_VERSION1);
 606
 607         if ((tdb->flags & TDB_RDONLY) || tdb->tdb1.traverse_read) {
 608                 tdb->last_error = TDB_ERR_RDONLY;
 609                 return -1;
 610         }
 611
 612         /* find which hash bucket it is in */
 613         hash = tdb_hash(tdb, key.dptr, key.dsize);
 614         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 615                 return -1;
 616
 617         ret = _tdb1_store(tdb, key, dbuf, flag, hash);
 618         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 619         return ret;
 620 }
 621
 622 /* Append to an entry. Create if not exist. */
 623 int tdb1_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
 624 {
 625         uint32_t hash;
 626         TDB_DATA dbuf;
 627         int ret = -1;
 628
 629         /* find which hash bucket it is in */
 630         hash = tdb_hash(tdb, key.dptr, key.dsize);
 631         if (tdb1_lock(tdb, TDB1_BUCKET(hash), F_WRLCK) == -1)
 632                 return -1;
 633
 634         dbuf = _tdb1_fetch(tdb, key);
 635
 636         if (dbuf.dptr == NULL) {
 637                 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
 638         } else {
 639                 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
 640                 unsigned char *new_dptr;
 641
 642                 /* realloc '0' is special: don't do that. */
 643                 if (new_len == 0)
 644                         new_len = 1;
 645                 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
 646                 if (new_dptr == NULL) {
 647                         free(dbuf.dptr);
 648                 }
 649                 dbuf.dptr = new_dptr;
 650         }
 651
 652         if (dbuf.dptr == NULL) {
 653                 tdb->last_error = TDB_ERR_OOM;
 654                 goto failed;
 655         }
 656
 657         memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
 658         dbuf.dsize += new_dbuf.dsize;
 659
 660         ret = _tdb1_store(tdb, key, dbuf, 0, hash);
 661
 662 failed:
 663         tdb1_unlock(tdb, TDB1_BUCKET(hash), F_WRLCK);
 664         SAFE_FREE(dbuf.dptr);
 665         return ret;
 666 }
 667
 668
 669 /*
 670   get the tdb sequence number. Only makes sense if the writers opened
 671   with TDB1_SEQNUM set. Note that this sequence number will wrap quite
 672   quickly, so it should only be used for a 'has something changed'
 673   test, not for code that relies on the count of the number of changes
 674   made. If you want a counter then use a tdb record.
 675
 676   The aim of this sequence number is to allow for a very lightweight
 677   test of a possible tdb change.
 678 */
 679 int tdb1_get_seqnum(struct tdb_context *tdb)
 680 {
 681         tdb1_off_t seqnum=0;
 682
 683         tdb1_ofs_read(tdb, TDB1_SEQNUM_OFS, &seqnum);
 684         return seqnum;
 685 }
 686
 687
 688 /*
 689   add a region of the file to the freelist. Length is the size of the region in bytes,
 690   which includes the free list header that needs to be added
 691  */
 692 static int tdb1_free_region(struct tdb_context *tdb, tdb1_off_t offset, ssize_t length)
 693 {
 694         struct tdb1_record rec;
 695         if (length <= sizeof(rec)) {
 696                 /* the region is not worth adding */
 697                 return 0;
 698         }
 699         if (length + offset > tdb->file->map_size) {
 700                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
 701                                         "tdb1_free_region: adding region beyond"
 702                                         " end of file");
 703                 return -1;
 704         }
 705         memset(&rec,'\0',sizeof(rec));
 706         rec.rec_len = length - sizeof(rec);
 707         if (tdb1_free(tdb, offset, &rec) == -1) {
 708                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 709                            "tdb1_free_region: failed to add free record");
 710                 return -1;
 711         }
 712         return 0;
 713 }
 714
 715 /*
 716   wipe the entire database, deleting all records. This can be done
 717   very fast by using a allrecord lock. The entire data portion of the
 718   file becomes a single entry in the freelist.
 719
 720   This code carefully steps around the recovery area, leaving it alone
 721  */
 722 int tdb1_wipe_all(struct tdb_context *tdb)
 723 {
 724         int i;
 725         tdb1_off_t offset = 0;
 726         ssize_t data_len;
 727         tdb1_off_t recovery_head;
 728         tdb1_len_t recovery_size = 0;
 729
 730         if (tdb1_lockall(tdb) != 0) {
 731                 return -1;
 732         }
 733
 734
 735         /* see if the tdb has a recovery area, and remember its size
 736            if so. We don't want to lose this as otherwise each
 737            tdb1_wipe_all() in a transaction will increase the size of
 738            the tdb by the size of the recovery area */
 739         if (tdb1_ofs_read(tdb, TDB1_RECOVERY_HEAD, &recovery_head) == -1) {
 740                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 741                            "tdb1_wipe_all: failed to read recovery head");
 742                 goto failed;
 743         }
 744
 745         if (recovery_head != 0) {
 746                 struct tdb1_record rec;
 747                 if (tdb->tdb1.io->tdb1_read(tdb, recovery_head, &rec, sizeof(rec), TDB1_DOCONV()) == -1) {
 748                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 749                                    "tdb1_wipe_all: failed to read recovery record");
 750                         return -1;
 751                 }
 752                 recovery_size = rec.rec_len + sizeof(rec);
 753         }
 754
 755         /* wipe the hashes */
 756         for (i=0;i<tdb->tdb1.header.hash_size;i++) {
 757                 if (tdb1_ofs_write(tdb, TDB1_HASH_TOP(i), &offset) == -1) {
 758                         tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 759                                    "tdb1_wipe_all: failed to write hash %d", i);
 760                         goto failed;
 761                 }
 762         }
 763
 764         /* wipe the freelist */
 765         if (tdb1_ofs_write(tdb, TDB1_FREELIST_TOP, &offset) == -1) {
 766                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 767                            "tdb1_wipe_all: failed to write freelist");
 768                 goto failed;
 769         }
 770
 771         /* add all the rest of the file to the freelist, possibly leaving a gap
 772            for the recovery area */
 773         if (recovery_size == 0) {
 774                 /* the simple case - the whole file can be used as a freelist */
 775                 data_len = (tdb->file->map_size - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 776                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 777                         goto failed;
 778                 }
 779         } else {
 780                 /* we need to add two freelist entries - one on either
 781                    side of the recovery area
 782
 783                    Note that we cannot shift the recovery area during
 784                    this operation. Only the transaction.c code may
 785                    move the recovery area or we risk subtle data
 786                    corruption
 787                 */
 788                 data_len = (recovery_head - TDB1_DATA_START(tdb->tdb1.header.hash_size));
 789                 if (tdb1_free_region(tdb, TDB1_DATA_START(tdb->tdb1.header.hash_size), data_len) != 0) {
 790                         goto failed;
 791                 }
 792                 /* and the 2nd free list entry after the recovery area - if any */
 793                 data_len = tdb->file->map_size - (recovery_head+recovery_size);
 794                 if (tdb1_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
 795                         goto failed;
 796                 }
 797         }
 798
 799         if (tdb1_unlockall(tdb) != 0) {
 800                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 801                            "tdb1_wipe_all: failed to unlock");
 802                 goto failed;
 803         }
 804
 805         return 0;
 806
 807 failed:
 808         tdb1_unlockall(tdb);
 809         return -1;
 810 }
 811
 812 struct traverse_state {
 813         enum TDB_ERROR error;
 814         struct tdb_context *dest_db;
 815 };
 816
 817 /*
 818   traverse function for repacking
 819  */
 820 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
 821 {
 822         struct traverse_state *state = (struct traverse_state *)private_data;
 823         if (tdb1_store(state->dest_db, key, data, TDB_INSERT) != 0) {
 824                 state->error = state->dest_db->last_error;
 825                 return -1;
 826         }
 827         return 0;
 828 }
 829
 830 /*
 831   repack a tdb
 832  */
 833 int tdb1_repack(struct tdb_context *tdb)
 834 {
 835         struct tdb_context *tmp_db;
 836         struct traverse_state state;
 837         union tdb_attribute hsize;
 838
 839         hsize.base.attr = TDB_ATTRIBUTE_TDB1_HASHSIZE;
 840         hsize.base.next = NULL;
 841         hsize.tdb1_hashsize.hsize = tdb->tdb1.header.hash_size;
 842
 843         if (tdb1_transaction_start(tdb) != 0) {
 844                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 845                            __location__ " Failed to start transaction");
 846                 return -1;
 847         }
 848
 849         tmp_db = tdb_open("tmpdb", TDB_INTERNAL, O_RDWR|O_CREAT, 0, &hsize);
 850         if (tmp_db == NULL) {
 851                 tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
 852                                         __location__ " Failed to create tmp_db");
 853                 tdb1_transaction_cancel(tdb);
 854                 return -1;
 855         }
 856
 857         state.error = TDB_SUCCESS;
 858         state.dest_db = tmp_db;
 859
 860         if (tdb1_traverse_read(tdb, repack_traverse, &state) == -1) {
 861                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 862                            __location__ " Failed to traverse copying out");
 863                 tdb1_transaction_cancel(tdb);
 864                 tdb_close(tmp_db);
 865                 return -1;
 866         }
 867
 868         if (state.error != TDB_SUCCESS) {
 869                 tdb->last_error = tdb_logerr(tdb, state.error, TDB_LOG_ERROR,
 870                                         __location__ " Error during traversal");
 871                 tdb1_transaction_cancel(tdb);
 872                 tdb_close(tmp_db);
 873                 return -1;
 874         }
 875
 876         if (tdb1_wipe_all(tdb) != 0) {
 877                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 878                            __location__ " Failed to wipe database\n");
 879                 tdb1_transaction_cancel(tdb);
 880                 tdb_close(tmp_db);
 881                 return -1;
 882         }
 883
 884         state.error = TDB_SUCCESS;
 885         state.dest_db = tdb;
 886
 887         if (tdb1_traverse_read(tmp_db, repack_traverse, &state) == -1) {
 888                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 889                            __location__ " Failed to traverse copying back");
 890                 tdb1_transaction_cancel(tdb);
 891                 tdb_close(tmp_db);
 892                 return -1;
 893         }
 894
 895         if (state.error) {
 896                 tdb->last_error = tdb_logerr(tdb, state.error, TDB_LOG_ERROR,
 897                                         __location__ " Error during second traversal");
 898                 tdb1_transaction_cancel(tdb);
 899                 tdb_close(tmp_db);
 900                 return -1;
 901         }
 902
 903         tdb_close(tmp_db);
 904
 905         if (tdb1_transaction_commit(tdb) != 0) {
 906                 tdb_logerr(tdb, tdb->last_error, TDB_LOG_ERROR,
 907                            __location__ " Failed to commit");
 908                 return -1;
 909         }
 910
 911         return 0;
 912 }
 913
 914 /* Even on files, we can get partial writes due to signals. */
 915 bool tdb1_write_all(int fd, const void *buf, size_t count)
 916 {
 917         while (count) {
 918                 ssize_t ret;
 919                 ret = write(fd, buf, count);
 920                 if (ret < 0)
 921                         return false;
 922                 buf = (const char *)buf + ret;
 923                 count -= ret;
 924         }
 925         return true;
 926 }