2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
63 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
67 tdb_increment_seqnum_nonblock(tdb);
69 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
72 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
74 return memcmp(data.dptr, key.dptr, data.dsize);
77 /* Returns 0 on fail. On success, return offset of record, and fills
79 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
84 /* read in the hash top */
85 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
88 /* keep looking until we find the right record */
90 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
93 if (!TDB_DEAD(r) && hash==r->full_hash
94 && key.dsize==r->key_len
95 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
96 r->key_len, tdb_key_compare,
100 /* detect tight infinite loop */
101 if (rec_ptr == r->next) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
108 tdb->ecode = TDB_ERR_NOEXIST;
112 /* As tdb_find, but if you succeed, keep the lock */
113 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
114 struct tdb_record *rec)
118 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
120 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
121 tdb_unlock(tdb, BUCKET(hash), locktype);
125 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
127 /* update an entry in place - this only works if the new data size
128 is <= the old data size and the key exists.
129 on failure return -1.
131 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
133 struct tdb_record rec;
137 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
140 /* it could be an exact duplicate of what is there - this is
141 * surprisingly common (eg. with a ldb re-index). */
142 if (rec.key_len == key.dsize &&
143 rec.data_len == dbuf.dsize &&
144 rec.full_hash == hash) {
145 TDB_DATA data = _tdb_fetch(tdb, key);
146 if (data.dsize == dbuf.dsize &&
147 memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
159 /* must be long enough key, data and tailer */
160 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
161 tdb->ecode = TDB_SUCCESS; /* Not really an error */
165 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
166 dbuf.dptr, dbuf.dsize) == -1)
169 if (dbuf.dsize != rec.data_len) {
171 rec.data_len = dbuf.dsize;
172 return tdb_rec_write(tdb, rec_ptr, &rec);
178 /* find an entry in the database given a key */
179 /* If an entry doesn't exist tdb_err will be set to
180 * TDB_ERR_NOEXIST. If a key has no data attached
181 * then the TDB_DATA will have zero length but
184 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
187 struct tdb_record rec;
191 /* find which hash bucket it is in */
192 hash = tdb->hash_fn(&key);
193 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
196 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
198 ret.dsize = rec.data_len;
199 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
203 TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
205 TDB_DATA ret = _tdb_fetch(tdb, key);
207 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
212 * Find an entry in the database and hand the record's data to a parsing
213 * function. The parsing function is executed under the chain read lock, so it
214 * should be fast and should not block on other syscalls.
216 * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
218 * For mmapped tdb's that do not have a transaction open it points the parsing
219 * function directly at the mmap area, it avoids the malloc/memcpy in this
220 * case. If a transaction is open or no mmap is available, it has to do
221 * malloc/read/parse/free.
223 * This is interesting for all readers of potentially large data structures in
224 * the tdb records, ldb indexes being one example.
227 int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
228 int (*parser)(TDB_DATA key, TDB_DATA data,
233 struct tdb_record rec;
237 /* find which hash bucket it is in */
238 hash = tdb->hash_fn(&key);
240 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
241 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
242 tdb->ecode = TDB_ERR_NOEXIST;
245 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
247 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
248 rec.data_len, parser, private_data);
250 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
255 /* check if an entry in the database exists
257 note that 1 is returned if the key is found and 0 is returned if not found
258 this doesn't match the conventions in the rest of this module, but is
261 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
263 struct tdb_record rec;
265 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
267 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
271 int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
273 uint32_t hash = tdb->hash_fn(&key);
276 ret = tdb_exists_hash(tdb, key, hash);
277 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
281 /* actually delete an entry in the database given the offset */
282 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
284 tdb_off_t last_ptr, i;
285 struct tdb_record lastrec;
287 if (tdb->read_only || tdb->traverse_read) return -1;
289 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
290 tdb_write_lock_record(tdb, rec_ptr) == -1) {
291 /* Someone traversing here: mark it as dead */
292 rec->magic = TDB_DEAD_MAGIC;
293 return tdb_rec_write(tdb, rec_ptr, rec);
295 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
298 /* find previous record in hash chain */
299 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
301 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
302 if (tdb_rec_read(tdb, i, &lastrec) == -1)
305 /* unlink it: next ptr is at start of record. */
307 last_ptr = TDB_HASH_TOP(rec->full_hash);
308 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
311 /* recover the space */
312 if (tdb_free(tdb, rec_ptr, rec) == -1)
317 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
321 struct tdb_record rec;
323 /* read in the hash top */
324 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
328 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
331 if (rec.magic == TDB_DEAD_MAGIC) {
340 * Purge all DEAD records from a hash chain
342 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
345 struct tdb_record rec;
348 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
352 /* read in the hash top */
353 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
359 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
365 if (rec.magic == TDB_DEAD_MAGIC
366 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
373 tdb_unlock(tdb, -1, F_WRLCK);
377 /* delete an entry in the database given a key */
378 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
381 struct tdb_record rec;
384 if (tdb->max_dead_records != 0) {
387 * Allow for some dead records per hash chain, mainly for
388 * tdb's with a very high create/delete rate like locking.tdb.
391 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
394 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
396 * Don't let the per-chain freelist grow too large,
397 * delete all existing dead records
399 tdb_purge_dead(tdb, hash);
402 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
403 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
408 * Just mark the record as dead.
410 rec.magic = TDB_DEAD_MAGIC;
411 ret = tdb_rec_write(tdb, rec_ptr, &rec);
414 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
418 ret = tdb_do_delete(tdb, rec_ptr, &rec);
422 tdb_increment_seqnum(tdb);
425 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
426 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
430 int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
432 uint32_t hash = tdb->hash_fn(&key);
435 ret = tdb_delete_hash(tdb, key, hash);
436 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
441 * See if we have a dead record around with enough space
443 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
444 struct tdb_record *r, tdb_len_t length)
448 /* read in the hash top */
449 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
452 /* keep looking until we find the right record */
454 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
457 if (TDB_DEAD(r) && r->rec_len >= length) {
459 * First fit for simple coding, TODO: change to best
469 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
470 TDB_DATA dbuf, int flag, uint32_t hash)
472 struct tdb_record rec;
477 /* check for it existing, on insert. */
478 if (flag == TDB_INSERT) {
479 if (tdb_exists_hash(tdb, key, hash)) {
480 tdb->ecode = TDB_ERR_EXISTS;
484 /* first try in-place update, on modify or replace. */
485 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
488 if (tdb->ecode == TDB_ERR_NOEXIST &&
489 flag == TDB_MODIFY) {
490 /* if the record doesn't exist and we are in TDB_MODIFY mode then
491 we should fail the store */
495 /* reset the error code potentially set by the tdb_update() */
496 tdb->ecode = TDB_SUCCESS;
498 /* delete any existing record - if it doesn't exist we don't
499 care. Doing this first reduces fragmentation, and avoids
500 coalescing with `allocated' block before it's updated. */
501 if (flag != TDB_INSERT)
502 tdb_delete_hash(tdb, key, hash);
504 /* Copy key+value *before* allocating free space in case malloc
505 fails and we are left with a dead spot in the tdb. */
507 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
508 tdb->ecode = TDB_ERR_OOM;
512 memcpy(p, key.dptr, key.dsize);
514 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
516 if (tdb->max_dead_records != 0) {
518 * Allow for some dead records per hash chain, look if we can
519 * find one that can hold the new record. We need enough space
520 * for key, data and tailer. If we find one, we don't have to
521 * consult the central freelist.
523 rec_ptr = tdb_find_dead(
525 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
528 rec.key_len = key.dsize;
529 rec.data_len = dbuf.dsize;
530 rec.full_hash = hash;
531 rec.magic = TDB_MAGIC;
532 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
533 || tdb->methods->tdb_write(
534 tdb, rec_ptr + sizeof(rec),
535 p, key.dsize + dbuf.dsize) == -1) {
543 * We have to allocate some space from the freelist, so this means we
544 * have to lock it. Use the chance to purge all the DEAD records from
545 * the hash chain under the freelist lock.
548 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
552 if ((tdb->max_dead_records != 0)
553 && (tdb_purge_dead(tdb, hash) == -1)) {
554 tdb_unlock(tdb, -1, F_WRLCK);
558 /* we have to allocate some space */
559 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
561 tdb_unlock(tdb, -1, F_WRLCK);
567 /* Read hash top into next ptr */
568 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
571 rec.key_len = key.dsize;
572 rec.data_len = dbuf.dsize;
573 rec.full_hash = hash;
574 rec.magic = TDB_MAGIC;
576 /* write out and point the top of the hash chain at it */
577 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
578 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
579 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
580 /* Need to tdb_unallocate() here */
588 tdb_increment_seqnum(tdb);
595 /* store an element in the database, replacing any existing element
598 return 0 on success, -1 on failure
600 int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
605 if (tdb->read_only || tdb->traverse_read) {
606 tdb->ecode = TDB_ERR_RDONLY;
607 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
611 /* find which hash bucket it is in */
612 hash = tdb->hash_fn(&key);
613 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
616 ret = _tdb_store(tdb, key, dbuf, flag, hash);
617 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
618 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
622 /* Append to an entry. Create if not exist. */
623 int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
629 /* find which hash bucket it is in */
630 hash = tdb->hash_fn(&key);
631 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
634 dbuf = _tdb_fetch(tdb, key);
636 if (dbuf.dptr == NULL) {
637 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
639 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
640 unsigned char *new_dptr;
642 /* realloc '0' is special: don't do that. */
645 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
646 if (new_dptr == NULL) {
649 dbuf.dptr = new_dptr;
652 if (dbuf.dptr == NULL) {
653 tdb->ecode = TDB_ERR_OOM;
657 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
658 dbuf.dsize += new_dbuf.dsize;
660 ret = _tdb_store(tdb, key, dbuf, 0, hash);
661 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
664 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
665 SAFE_FREE(dbuf.dptr);
671 return the name of the current tdb file
672 useful for external logging functions
674 const char *tdb_name(struct tdb_context *tdb)
680 return the underlying file descriptor being used by tdb, or -1
681 useful for external routines that want to check the device/inode
684 int tdb_fd(struct tdb_context *tdb)
690 return the current logging function
691 useful for external tdb routines that wish to log tdb errors
693 tdb_log_func tdb_log_fn(struct tdb_context *tdb)
695 return tdb->log.log_fn;
700 get the tdb sequence number. Only makes sense if the writers opened
701 with TDB_SEQNUM set. Note that this sequence number will wrap quite
702 quickly, so it should only be used for a 'has something changed'
703 test, not for code that relies on the count of the number of changes
704 made. If you want a counter then use a tdb record.
706 The aim of this sequence number is to allow for a very lightweight
707 test of a possible tdb change.
709 int tdb_get_seqnum(struct tdb_context *tdb)
713 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
717 int tdb_hash_size(struct tdb_context *tdb)
719 return tdb->header.hash_size;
722 size_t tdb_map_size(struct tdb_context *tdb)
724 return tdb->map_size;
727 int tdb_get_flags(struct tdb_context *tdb)
732 void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
734 if ((flags & TDB_ALLOW_NESTING) &&
735 (flags & TDB_DISALLOW_NESTING)) {
736 tdb->ecode = TDB_ERR_NESTING;
737 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
738 "allow_nesting and disallow_nesting are not allowed together!"));
742 if (flags & TDB_ALLOW_NESTING) {
743 tdb->flags &= ~TDB_DISALLOW_NESTING;
745 if (flags & TDB_DISALLOW_NESTING) {
746 tdb->flags &= ~TDB_ALLOW_NESTING;
752 void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
754 if ((flags & TDB_ALLOW_NESTING) &&
755 (flags & TDB_DISALLOW_NESTING)) {
756 tdb->ecode = TDB_ERR_NESTING;
757 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
758 "allow_nesting and disallow_nesting are not allowed together!"));
762 if (flags & TDB_ALLOW_NESTING) {
763 tdb->flags |= TDB_DISALLOW_NESTING;
765 if (flags & TDB_DISALLOW_NESTING) {
766 tdb->flags |= TDB_ALLOW_NESTING;
769 tdb->flags &= ~flags;
774 enable sequence number handling on an open tdb
776 void tdb_enable_seqnum(struct tdb_context *tdb)
778 tdb->flags |= TDB_SEQNUM;
783 add a region of the file to the freelist. Length is the size of the region in bytes,
784 which includes the free list header that needs to be added
786 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
788 struct tdb_record rec;
789 if (length <= sizeof(rec)) {
790 /* the region is not worth adding */
793 if (length + offset > tdb->map_size) {
794 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
797 memset(&rec,'\0',sizeof(rec));
798 rec.rec_len = length - sizeof(rec);
799 if (tdb_free(tdb, offset, &rec) == -1) {
800 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
807 wipe the entire database, deleting all records. This can be done
808 very fast by using a allrecord lock. The entire data portion of the
809 file becomes a single entry in the freelist.
811 This code carefully steps around the recovery area, leaving it alone
813 int tdb_wipe_all(struct tdb_context *tdb)
816 tdb_off_t offset = 0;
818 tdb_off_t recovery_head;
819 tdb_len_t recovery_size = 0;
821 if (tdb_lockall(tdb) != 0) {
825 tdb_trace(tdb, "tdb_wipe_all");
827 /* see if the tdb has a recovery area, and remember its size
828 if so. We don't want to lose this as otherwise each
829 tdb_wipe_all() in a transaction will increase the size of
830 the tdb by the size of the recovery area */
831 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
832 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
836 if (recovery_head != 0) {
837 struct tdb_record rec;
838 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
839 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
842 recovery_size = rec.rec_len + sizeof(rec);
845 /* wipe the hashes */
846 for (i=0;i<tdb->header.hash_size;i++) {
847 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
848 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
853 /* wipe the freelist */
854 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
855 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
859 /* add all the rest of the file to the freelist, possibly leaving a gap
860 for the recovery area */
861 if (recovery_size == 0) {
862 /* the simple case - the whole file can be used as a freelist */
863 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
864 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
868 /* we need to add two freelist entries - one on either
869 side of the recovery area
871 Note that we cannot shift the recovery area during
872 this operation. Only the transaction.c code may
873 move the recovery area or we risk subtle data
876 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
877 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
880 /* and the 2nd free list entry after the recovery area - if any */
881 data_len = tdb->map_size - (recovery_head+recovery_size);
882 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
887 if (tdb_unlockall(tdb) != 0) {
888 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
899 struct traverse_state {
901 struct tdb_context *dest_db;
905 traverse function for repacking
907 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
909 struct traverse_state *state = (struct traverse_state *)private_data;
910 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
920 int tdb_repack(struct tdb_context *tdb)
922 struct tdb_context *tmp_db;
923 struct traverse_state state;
925 tdb_trace(tdb, "tdb_repack");
927 if (tdb_transaction_start(tdb) != 0) {
928 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
932 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
933 if (tmp_db == NULL) {
934 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
935 tdb_transaction_cancel(tdb);
940 state.dest_db = tmp_db;
942 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
943 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
944 tdb_transaction_cancel(tdb);
950 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
951 tdb_transaction_cancel(tdb);
956 if (tdb_wipe_all(tdb) != 0) {
957 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
958 tdb_transaction_cancel(tdb);
966 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
967 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
968 tdb_transaction_cancel(tdb);
974 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
975 tdb_transaction_cancel(tdb);
982 if (tdb_transaction_commit(tdb) != 0) {
983 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
991 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
993 if (write(tdb->tracefd, str, strlen(str)) != strlen(str)) {
999 static void tdb_trace_start(struct tdb_context *tdb)
1002 char msg[sizeof(tdb_off_t) * 4 + 1];
1004 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1005 snprintf(msg, sizeof(msg), "%u ", seqnum);
1006 tdb_trace_write(tdb, msg);
1009 static void tdb_trace_end(struct tdb_context *tdb)
1011 tdb_trace_write(tdb, "\n");
1014 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1016 char msg[sizeof(ret) * 4 + 4];
1017 snprintf(msg, sizeof(msg), " = %i\n", ret);
1018 tdb_trace_write(tdb, msg);
1021 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1023 char msg[20 + rec.dsize*2], *p;
1026 /* We differentiate zero-length records from non-existent ones. */
1027 if (rec.dptr == NULL) {
1028 tdb_trace_write(tdb, " NULL");
1032 /* snprintf here is purely cargo-cult programming. */
1034 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1035 for (i = 0; i < rec.dsize; i++)
1036 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1038 tdb_trace_write(tdb, msg);
1041 void tdb_trace(struct tdb_context *tdb, const char *op)
1043 tdb_trace_start(tdb);
1044 tdb_trace_write(tdb, op);
1048 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1050 char msg[sizeof(tdb_off_t) * 4 + 1];
1052 snprintf(msg, sizeof(msg), "%u ", seqnum);
1053 tdb_trace_write(tdb, msg);
1054 tdb_trace_write(tdb, op);
1058 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1059 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1063 snprintf(msg, sizeof(msg),
1064 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1065 tdb_trace_start(tdb);
1066 tdb_trace_write(tdb, msg);
1070 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1072 tdb_trace_start(tdb);
1073 tdb_trace_write(tdb, op);
1074 tdb_trace_end_ret(tdb, ret);
1077 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1079 tdb_trace_start(tdb);
1080 tdb_trace_write(tdb, op);
1081 tdb_trace_write(tdb, " =");
1082 tdb_trace_record(tdb, ret);
1086 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1089 tdb_trace_start(tdb);
1090 tdb_trace_write(tdb, op);
1091 tdb_trace_record(tdb, rec);
1095 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1096 TDB_DATA rec, int ret)
1098 tdb_trace_start(tdb);
1099 tdb_trace_write(tdb, op);
1100 tdb_trace_record(tdb, rec);
1101 tdb_trace_end_ret(tdb, ret);
1104 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1105 TDB_DATA rec, TDB_DATA ret)
1107 tdb_trace_start(tdb);
1108 tdb_trace_write(tdb, op);
1109 tdb_trace_record(tdb, rec);
1110 tdb_trace_write(tdb, " =");
1111 tdb_trace_record(tdb, ret);
1115 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1116 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1119 char msg[1 + sizeof(ret) * 4];
1121 snprintf(msg, sizeof(msg), " %#x", flag);
1122 tdb_trace_start(tdb);
1123 tdb_trace_write(tdb, op);
1124 tdb_trace_record(tdb, rec1);
1125 tdb_trace_record(tdb, rec2);
1126 tdb_trace_write(tdb, msg);
1127 tdb_trace_end_ret(tdb, ret);
1130 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1131 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1133 tdb_trace_start(tdb);
1134 tdb_trace_write(tdb, op);
1135 tdb_trace_record(tdb, rec1);
1136 tdb_trace_record(tdb, rec2);
1137 tdb_trace_write(tdb, " =");
1138 tdb_trace_record(tdb, ret);