still available, but no transaction recovery area is used and no
fsync/msync calls are made.
- - if TDB_NO_NESTING is passed to flags in tdb open then transaction
- nesting is disabled. tdb_transaction_start() will then implicitely
- cancel any pending transactions and always start a new transaction
- context instead of nesting.
+ - if TDB_ALLOW_NESTING is passed to flags in tdb open, or added using
+ tdb_add_flags() transaction is enabled.
+ The default is that transaction nesting is not allowed and an attempt
+ to create a nested transaction will fail with TDB_ERR_NESTING.
+ Beware. when transactions are nested a transaction successfully
+ completed with tdb_transaction_commit() can be silently unrolled later.
*/
but don't create a new transaction */
int nesting;
+ /* set when a prepare has already occurred */
+ bool prepared;
+ tdb_off_t magic_offset;
+
/* old file size before transaction */
tdb_len_t old_map_size;
+
+ /* we should re-pack on commit */
+ bool need_repack;
};
{
uint32_t blk;
+ /* Only a commit is allowed on a prepared transaction */
+ if (tdb->transaction->prepared) {
+ tdb->ecode = TDB_ERR_EINVAL;
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: transaction already prepared, read not allowed\n"));
+ tdb->transaction->transaction_error = 1;
+ return -1;
+ }
+
/* break it down into block sized ops */
while (len + (off % tdb->transaction->block_size) > tdb->transaction->block_size) {
tdb_len_t len2 = tdb->transaction->block_size - (off % tdb->transaction->block_size);
{
uint32_t blk;
+ /* Only a commit is allowed on a prepared transaction */
+ if (tdb->transaction->prepared) {
+ tdb->ecode = TDB_ERR_EINVAL;
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: transaction already prepared, write not allowed\n"));
+ tdb->transaction->transaction_error = 1;
+ return -1;
+ }
+
/* if the write is to a hash head, then update the transaction
hash heads */
if (len == sizeof(tdb_off_t) && off >= FREELIST_TOP &&
if (len <= tdb->map_size) {
return 0;
}
- return TDB_ERRCODE(TDB_ERR_IO, -1);
+ tdb->ecode = TDB_ERR_IO;
+ return -1;
}
/*
return -1;
}
+ tdb->transaction->need_repack = true;
+
return 0;
}
/*
brlock during a transaction - ignore them
*/
-static int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset,
- int rw_type, int lck_type, int probe, size_t len)
+static int transaction_brlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len,
+ enum tdb_lock_flags flags)
+{
+ return 0;
+}
+
+static int transaction_brunlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len)
{
return 0;
}
transaction_next_hash_chain,
transaction_oob,
transaction_expand_file,
- transaction_brlock
+ transaction_brlock,
+ transaction_brunlock
};
+/*
+ sync to disk
+*/
+static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length)
+{
+ if (tdb->flags & TDB_NOSYNC) {
+ return 0;
+ }
+
+ if (fsync(tdb->fd) != 0) {
+ tdb->ecode = TDB_ERR_IO;
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n"));
+ return -1;
+ }
+#ifdef MS_SYNC
+ if (tdb->map_ptr) {
+ tdb_off_t moffset = offset & ~(tdb->page_size-1);
+ if (msync(moffset + (char *)tdb->map_ptr,
+ length + (offset - moffset), MS_SYNC) != 0) {
+ tdb->ecode = TDB_ERR_IO;
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: msync failed - %s\n",
+ strerror(errno)));
+ return -1;
+ }
+ }
+#endif
+ return 0;
+}
+
+/* ltype is F_WRLCK after prepare. */
+int _tdb_transaction_cancel(struct tdb_context *tdb, int ltype)
+{
+ int i, ret = 0;
+
+ if (tdb->transaction == NULL) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n"));
+ return -1;
+ }
+
+ if (tdb->transaction->nesting != 0) {
+ tdb->transaction->transaction_error = 1;
+ tdb->transaction->nesting--;
+ return 0;
+ }
+
+ tdb->map_size = tdb->transaction->old_map_size;
+
+ /* free all the transaction blocks */
+ for (i=0;i<tdb->transaction->num_blocks;i++) {
+ if (tdb->transaction->blocks[i] != NULL) {
+ free(tdb->transaction->blocks[i]);
+ }
+ }
+ SAFE_FREE(tdb->transaction->blocks);
+
+ if (tdb->transaction->magic_offset) {
+ const struct tdb_methods *methods = tdb->transaction->io_methods;
+ uint32_t zero = 0;
+
+ /* remove the recovery marker */
+ if (methods->tdb_write(tdb, tdb->transaction->magic_offset, &zero, 4) == -1 ||
+ transaction_sync(tdb, tdb->transaction->magic_offset, 4) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_cancel: failed to remove recovery magic\n"));
+ ret = -1;
+ }
+ }
+
+ /* remove any global lock created during the transaction */
+ if (tdb->global_lock.count != 0) {
+ tdb_brunlock(tdb, tdb->global_lock.ltype,
+ FREELIST_TOP, 4*tdb->header.hash_size);
+ tdb->global_lock.count = 0;
+ }
+
+ /* remove any locks created during the transaction */
+ if (tdb->num_locks != 0) {
+ for (i=0;i<tdb->num_lockrecs;i++) {
+ tdb_brunlock(tdb, tdb->lockrecs[i].ltype,
+ FREELIST_TOP+4*tdb->lockrecs[i].list, 1);
+ }
+ tdb->num_locks = 0;
+ tdb->num_lockrecs = 0;
+ SAFE_FREE(tdb->lockrecs);
+ }
+
+ /* restore the normal io methods */
+ tdb->methods = tdb->transaction->io_methods;
+
+ tdb_brunlock(tdb, ltype, FREELIST_TOP, 0);
+ tdb_transaction_unlock(tdb, F_WRLCK);
+ SAFE_FREE(tdb->transaction->hash_heads);
+ SAFE_FREE(tdb->transaction);
+
+ return ret;
+}
/*
start a tdb transaction. No token is returned, as only a single
/* cope with nested tdb_transaction_start() calls */
if (tdb->transaction != NULL) {
- if (!tdb->flags & TDB_NO_NESTING) {
- tdb->transaction->nesting++;
- TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n",
- tdb->transaction->nesting));
- return 0;
- } else {
- tdb_transaction_cancel(tdb);
- TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: cancelling previous transaction\n"));
+ if (!(tdb->flags & TDB_ALLOW_NESTING)) {
+ tdb->ecode = TDB_ERR_NESTING;
+ return -1;
}
+ tdb_trace(tdb, "tdb_transaction_start");
+ tdb->transaction->nesting++;
+ TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n",
+ tdb->transaction->nesting));
+ return 0;
}
if (tdb->num_locks != 0 || tdb->global_lock.count) {
/* get a read lock from the freelist to the end of file. This
is upgraded to a write lock during the commit */
- if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) {
+ if (tdb_brlock(tdb, F_RDLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT) == -1) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n"));
tdb->ecode = TDB_ERR_LOCK;
goto fail;
tdb->transaction->io_methods = tdb->methods;
tdb->methods = &transaction_methods;
+ /* Trace at the end, so we get sequence number correct. */
+ tdb_trace(tdb, "tdb_transaction_start");
return 0;
fail:
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
- tdb_transaction_unlock(tdb);
+ tdb_brunlock(tdb, F_RDLCK, FREELIST_TOP, 0);
+ tdb_transaction_unlock(tdb, F_WRLCK);
SAFE_FREE(tdb->transaction->blocks);
SAFE_FREE(tdb->transaction->hash_heads);
SAFE_FREE(tdb->transaction);
cancel the current transaction
*/
int tdb_transaction_cancel(struct tdb_context *tdb)
-{
- int i;
-
- if (tdb->transaction == NULL) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n"));
- return -1;
- }
-
- if (tdb->transaction->nesting != 0) {
- tdb->transaction->transaction_error = 1;
- tdb->transaction->nesting--;
- return 0;
- }
-
- tdb->map_size = tdb->transaction->old_map_size;
-
- /* free all the transaction blocks */
- for (i=0;i<tdb->transaction->num_blocks;i++) {
- if (tdb->transaction->blocks[i] != NULL) {
- free(tdb->transaction->blocks[i]);
- }
- }
- SAFE_FREE(tdb->transaction->blocks);
-
- /* remove any global lock created during the transaction */
- if (tdb->global_lock.count != 0) {
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
- tdb->global_lock.count = 0;
- }
-
- /* remove any locks created during the transaction */
- if (tdb->num_locks != 0) {
- for (i=0;i<tdb->num_lockrecs;i++) {
- tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
- F_UNLCK,F_SETLKW, 0, 1);
- }
- tdb->num_locks = 0;
- tdb->num_lockrecs = 0;
- SAFE_FREE(tdb->lockrecs);
- }
-
- /* restore the normal io methods */
- tdb->methods = tdb->transaction->io_methods;
-
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
- tdb_transaction_unlock(tdb);
- SAFE_FREE(tdb->transaction->hash_heads);
- SAFE_FREE(tdb->transaction);
-
- return 0;
-}
-
-/*
- sync to disk
-*/
-static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length)
-{
- if (fsync(tdb->fd) != 0) {
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n"));
- return -1;
- }
-#ifdef MS_SYNC
- if (tdb->map_ptr) {
- tdb_off_t moffset = offset & ~(tdb->page_size-1);
- if (msync(moffset + (char *)tdb->map_ptr,
- length + (offset - moffset), MS_SYNC) != 0) {
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: msync failed - %s\n",
- strerror(errno)));
- return -1;
- }
- }
-#endif
- return 0;
+{
+ int ltype = F_RDLCK;
+ tdb_trace(tdb, "tdb_transaction_cancel");
+ if (tdb->transaction && tdb->transaction->prepared)
+ ltype = F_WRLCK;
+ return _tdb_transaction_cancel(tdb, ltype);
}
-
/*
work out how much space the linearised recovery data will consume
*/
return 0;
}
-/*
- commit the current transaction
-*/
-int tdb_transaction_commit(struct tdb_context *tdb)
+static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
{
const struct tdb_methods *methods;
- tdb_off_t magic_offset = 0;
- uint32_t zero = 0;
- int i;
if (tdb->transaction == NULL) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n"));
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: no transaction\n"));
+ return -1;
+ }
+
+ if (tdb->transaction->prepared) {
+ tdb->ecode = TDB_ERR_EINVAL;
+ _tdb_transaction_cancel(tdb, F_WRLCK);
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: transaction already prepared\n"));
return -1;
}
if (tdb->transaction->transaction_error) {
tdb->ecode = TDB_ERR_IO;
- tdb_transaction_cancel(tdb);
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n"));
+ _tdb_transaction_cancel(tdb, F_RDLCK);
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: transaction error pending\n"));
return -1;
}
return 0;
}
+#ifdef TDB_TRACE
+ /* store seqnum now, before reading becomes illegal. */
+ tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &tdb->transaction_prepare_seqnum);
+#endif
+
/* check for a null transaction */
if (tdb->transaction->blocks == NULL) {
- tdb_transaction_cancel(tdb);
return 0;
}
nested their locks properly, so fail the transaction */
if (tdb->num_locks || tdb->global_lock.count) {
tdb->ecode = TDB_ERR_LOCK;
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n"));
- tdb_transaction_cancel(tdb);
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: locks pending on commit\n"));
+ _tdb_transaction_cancel(tdb, F_RDLCK);
return -1;
}
/* upgrade the main transaction lock region to a write lock */
if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to upgrade hash locks\n"));
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to upgrade hash locks\n"));
tdb->ecode = TDB_ERR_LOCK;
- tdb_transaction_cancel(tdb);
+ _tdb_transaction_cancel(tdb, F_RDLCK);
return -1;
}
/* get the global lock - this prevents new users attaching to the database
during the commit */
- if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n"));
+ if (tdb_brlock(tdb, F_WRLCK, GLOBAL_LOCK, 1, TDB_LOCK_WAIT) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get global lock\n"));
tdb->ecode = TDB_ERR_LOCK;
- tdb_transaction_cancel(tdb);
+ _tdb_transaction_cancel(tdb, F_WRLCK);
return -1;
}
if (!(tdb->flags & TDB_NOSYNC)) {
/* write the recovery data to the end of the file */
- if (transaction_setup_recovery(tdb, &magic_offset) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n"));
- tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
- tdb_transaction_cancel(tdb);
+ if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
+ tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
+ _tdb_transaction_cancel(tdb, F_WRLCK);
return -1;
}
}
+ tdb->transaction->prepared = true;
+
/* expand the file to the new size if needed */
if (tdb->map_size != tdb->transaction->old_map_size) {
if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
tdb->map_size -
tdb->transaction->old_map_size) == -1) {
tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n"));
- tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
- tdb_transaction_cancel(tdb);
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: expansion failed\n"));
+ tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
+ _tdb_transaction_cancel(tdb, F_WRLCK);
return -1;
}
tdb->map_size = tdb->transaction->old_map_size;
methods->tdb_oob(tdb, tdb->map_size + 1, 1);
}
+ /* Keep the global lock until the actual commit */
+
+ return 0;
+}
+
+/*
+ prepare to commit the current transaction
+*/
+int tdb_transaction_prepare_commit(struct tdb_context *tdb)
+{
+ tdb_trace(tdb, "tdb_transaction_prepare_commit");
+ return _tdb_transaction_prepare_commit(tdb);
+}
+
+/*
+ commit the current transaction
+*/
+int tdb_transaction_commit(struct tdb_context *tdb)
+{
+ const struct tdb_methods *methods;
+ int i;
+ bool need_repack;
+
+ if (tdb->transaction == NULL) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: no transaction\n"));
+ return -1;
+ }
+
+ /* If we've prepared, can't read seqnum. */
+ if (tdb->transaction->prepared) {
+ tdb_trace_seqnum(tdb, tdb->transaction_prepare_seqnum,
+ "tdb_transaction_commit");
+ } else {
+ tdb_trace(tdb, "tdb_transaction_commit");
+ }
+
+ if (tdb->transaction->transaction_error) {
+ tdb->ecode = TDB_ERR_IO;
+ tdb_transaction_cancel(tdb);
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n"));
+ return -1;
+ }
+
+
+ if (tdb->transaction->nesting != 0) {
+ tdb->transaction->nesting--;
+ return 0;
+ }
+
+ /* check for a null transaction */
+ if (tdb->transaction->blocks == NULL) {
+ _tdb_transaction_cancel(tdb, F_RDLCK);
+ return 0;
+ }
+
+ if (!tdb->transaction->prepared) {
+ int ret = _tdb_transaction_prepare_commit(tdb);
+ if (ret)
+ return ret;
+ }
+
+ methods = tdb->transaction->io_methods;
+
/* perform all the writes */
for (i=0;i<tdb->transaction->num_blocks;i++) {
tdb_off_t offset;
tdb->methods = methods;
tdb_transaction_recover(tdb);
- tdb_transaction_cancel(tdb);
- tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
+ _tdb_transaction_cancel(tdb, F_WRLCK);
+ tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
return -1;
SAFE_FREE(tdb->transaction->blocks);
tdb->transaction->num_blocks = 0;
- if (!(tdb->flags & TDB_NOSYNC)) {
- /* ensure the new data is on disk */
- if (transaction_sync(tdb, 0, tdb->map_size) == -1) {
- return -1;
- }
-
- /* remove the recovery marker */
- if (methods->tdb_write(tdb, magic_offset, &zero, 4) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to remove recovery magic\n"));
- return -1;
- }
-
- /* ensure the recovery marker has been removed on disk */
- if (transaction_sync(tdb, magic_offset, 4) == -1) {
- return -1;
- }
+ /* ensure the new data is on disk */
+ if (transaction_sync(tdb, 0, tdb->map_size) == -1) {
+ return -1;
}
- tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
+ tdb_brunlock(tdb, F_WRLCK, GLOBAL_LOCK, 1);
/*
TODO: maybe write to some dummy hdr field, or write to magic
not be backed up (as tdb rounding to block sizes means that
file size changes are quite rare too). The following forces
mtime changes when a transaction completes */
-#ifdef HAVE_UTIME
+#if HAVE_UTIME
utime(tdb->name, NULL);
#endif
+ need_repack = tdb->transaction->need_repack;
+
/* use a transaction cancel to free memory and remove the
transaction locks */
- tdb_transaction_cancel(tdb);
+ _tdb_transaction_cancel(tdb, F_WRLCK);
+
+ if (need_repack) {
+ return tdb_repack(tdb);
+ }
return 0;
}