/* when inside a transaction we need to keep track of any
nested tdb_transaction_start() calls, as these are allowed,
but don't create a new transaction */
- int nesting;
+ unsigned int nesting;
/* set when a prepare has already occurred */
bool prepared;
tdb_len_t old_map_size;
};
+/* This doesn't really need to be pagesize, but we use it for similar reasons. */
+#define PAGESIZE 4096
/*
read while in a transaction. We need to check first if the data is in our list
enum TDB_ERROR ecode;
/* break it down into block sized ops */
- while (len + (off % getpagesize()) > getpagesize()) {
- tdb_len_t len2 = getpagesize() - (off % getpagesize());
+ while (len + (off % PAGESIZE) > PAGESIZE) {
+ tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
ecode = transaction_read(tdb, off, buf, len2);
if (ecode != TDB_SUCCESS) {
return ecode;
return TDB_SUCCESS;
}
- blk = off / getpagesize();
+ blk = off / PAGESIZE;
/* see if we have it in the block list */
if (tdb->transaction->num_blocks <= blk ||
}
/* now copy it out of this block */
- memcpy(buf, tdb->transaction->blocks[blk] + (off % getpagesize()), len);
+ memcpy(buf, tdb->transaction->blocks[blk] + (off % PAGESIZE), len);
return TDB_SUCCESS;
fail:
}
/* break it up into block sized chunks */
- while (len + (off % getpagesize()) > getpagesize()) {
- tdb_len_t len2 = getpagesize() - (off % getpagesize());
+ while (len + (off % PAGESIZE) > PAGESIZE) {
+ tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
ecode = transaction_write(tdb, off, buf, len2);
if (ecode != TDB_SUCCESS) {
return -1;
return TDB_SUCCESS;
}
- blk = off / getpagesize();
- off = off % getpagesize();
+ blk = off / PAGESIZE;
+ off = off % PAGESIZE;
if (tdb->transaction->num_blocks <= blk) {
uint8_t **new_blocks;
/* allocate and fill a block? */
if (tdb->transaction->blocks[blk] == NULL) {
- tdb->transaction->blocks[blk] = (uint8_t *)calloc(getpagesize(), 1);
+ tdb->transaction->blocks[blk] = (uint8_t *)calloc(PAGESIZE, 1);
if (tdb->transaction->blocks[blk] == NULL) {
ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
"transaction_write:"
" failed to allocate");
goto fail;
}
- if (tdb->transaction->old_map_size > blk * getpagesize()) {
- tdb_len_t len2 = getpagesize();
- if (len2 + (blk * getpagesize()) > tdb->transaction->old_map_size) {
- len2 = tdb->transaction->old_map_size - (blk * getpagesize());
+ if (tdb->transaction->old_map_size > blk * PAGESIZE) {
+ tdb_len_t len2 = PAGESIZE;
+ if (len2 + (blk * PAGESIZE) > tdb->transaction->old_map_size) {
+ len2 = tdb->transaction->old_map_size - (blk * PAGESIZE);
}
ecode = tdb->transaction->io_methods->tread(tdb,
- blk * getpagesize(),
+ blk * PAGESIZE,
tdb->transaction->blocks[blk],
len2);
if (ecode != TDB_SUCCESS) {
size_t blk;
/* break it up into block sized chunks */
- while (len + (off % getpagesize()) > getpagesize()) {
- tdb_len_t len2 = getpagesize() - (off % getpagesize());
+ while (len + (off % PAGESIZE) > PAGESIZE) {
+ tdb_len_t len2 = PAGESIZE - (off % PAGESIZE);
transaction_write_existing(tdb, off, buf, len2);
len -= len2;
off += len2;
return;
}
- blk = off / getpagesize();
- off = off % getpagesize();
+ blk = off / PAGESIZE;
+ off = off % PAGESIZE;
if (tdb->transaction->num_blocks <= blk ||
tdb->transaction->blocks[blk] == NULL) {
static void *transaction_direct(struct tdb_context *tdb, tdb_off_t off,
size_t len, bool write_mode)
{
- size_t blk = off / getpagesize(), end_blk;
+ size_t blk = off / PAGESIZE, end_blk;
/* This is wrong for zero-length blocks, but will fail gracefully */
- end_blk = (off + len - 1) / getpagesize();
+ end_blk = (off + len - 1) / PAGESIZE;
/* Can only do direct if in single block and we've already copied. */
if (write_mode) {
return NULL;
if (tdb->transaction->blocks[blk] == NULL)
return NULL;
- return tdb->transaction->blocks[blk] + off % getpagesize();
+ return tdb->transaction->blocks[blk] + off % PAGESIZE;
}
/* Single which we have copied? */
if (blk == end_blk
&& blk < tdb->transaction->num_blocks
&& tdb->transaction->blocks[blk])
- return tdb->transaction->blocks[blk] + off % getpagesize();
+ return tdb->transaction->blocks[blk] + off % PAGESIZE;
/* Otherwise must be all not copied. */
while (blk <= end_blk) {
}
#ifdef MS_SYNC
if (tdb->file->map_ptr) {
- tdb_off_t moffset = offset & ~(getpagesize()-1);
+ tdb_off_t moffset = offset & ~(PAGESIZE-1);
if (msync(moffset + (char *)tdb->file->map_ptr,
length + (offset - moffset), MS_SYNC) != 0) {
return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
/* cope with nested tdb_transaction_start() calls */
if (tdb->transaction != NULL) {
- return tdb->last_error = tdb_logerr(tdb, TDB_ERR_IO,
- TDB_LOG_USE_ERROR,
- "tdb_transaction_start:"
- " already inside"
- " transaction");
+ if (!(tdb->flags & TDB_ALLOW_NESTING)) {
+ return tdb->last_error
+ = tdb_logerr(tdb, TDB_ERR_IO,
+ TDB_LOG_USE_ERROR,
+ "tdb_transaction_start:"
+ " already inside transaction");
+ }
+ tdb->transaction->nesting++;
+ return 0;
}
if (tdb_has_hash_locks(tdb)) {
tdb_len_t recovery_size = 0;
int i;
- recovery_size = sizeof(tdb_len_t);
+ recovery_size = 0;
for (i=0;i<tdb->transaction->num_blocks;i++) {
- if (i * getpagesize() >= tdb->transaction->old_map_size) {
+ if (i * PAGESIZE >= tdb->transaction->old_map_size) {
break;
}
if (tdb->transaction->blocks[i] == NULL) {
if (i == tdb->transaction->num_blocks-1) {
recovery_size += tdb->transaction->last_block_size;
} else {
- recovery_size += getpagesize();
+ recovery_size += PAGESIZE;
}
}
us an area that is being currently used (as of the start of
the transaction) */
if (recovery_head != 0) {
- add_stat(tdb, frees, 1);
+ tdb->stats.frees++;
ecode = add_free_record(tdb, recovery_head,
- sizeof(rec) + rec.max_len);
+ sizeof(rec) + rec.max_len,
+ TDB_LOCK_WAIT, true);
if (ecode != TDB_SUCCESS) {
return tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
"tdb_recovery_allocate:"
/* the tdb_free() call might have increased the recovery size */
*recovery_size = tdb_recovery_size(tdb);
- /* round up to a multiple of page size */
+ /* round up to a multiple of page size. Overallocate, since each
+ * such allocation forces us to expand the file. */
*recovery_max_size
- = (((sizeof(rec) + *recovery_size) + getpagesize()-1)
- & ~(getpagesize()-1))
+ = (((sizeof(rec) + *recovery_size + *recovery_size / 2)
+ + PAGESIZE-1) & ~(PAGESIZE-1))
- sizeof(rec);
*recovery_offset = tdb->file->map_size;
recovery_head = *recovery_offset;
const struct tdb_methods *methods = tdb->transaction->io_methods;
struct tdb_recovery_record *rec;
tdb_off_t old_map_size = tdb->transaction->old_map_size;
- uint64_t magic, tailer;
+ uint64_t magic;
int i;
enum TDB_ERROR ecode;
continue;
}
- offset = i * getpagesize();
- length = getpagesize();
+ offset = i * PAGESIZE;
+ length = PAGESIZE;
if (i == tdb->transaction->num_blocks-1) {
length = tdb->transaction->last_block_size;
}
if (offset >= old_map_size) {
continue;
}
+
if (offset + length > tdb->file->map_size) {
free(data);
return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
/* the recovery area contains the old data, not the
new data, so we have to call the original tdb_read
method to get it */
- ecode = methods->tread(tdb, offset,
- p + sizeof(offset) + sizeof(length),
- length);
+ if (offset + length > old_map_size) {
+ /* Short read at EOF, and zero fill. */
+ unsigned int len = old_map_size - offset;
+ ecode = methods->tread(tdb, offset,
+ p + sizeof(offset) + sizeof(length),
+ len);
+ memset(p + sizeof(offset) + sizeof(length) + len, 0,
+ length - len);
+ } else {
+ ecode = methods->tread(tdb, offset,
+ p + sizeof(offset) + sizeof(length),
+ length);
+ }
if (ecode != TDB_SUCCESS) {
free(data);
return ecode;
p += sizeof(offset) + sizeof(length) + length;
}
- /* and the tailer */
- tailer = sizeof(*rec) + recovery_max_size;
- memcpy(p, &tailer, sizeof(tailer));
- tdb_convert(tdb, p, sizeof(tailer));
-
/* write the recovery data to the recovery area */
ecode = methods->twrite(tdb, recovery_offset, data,
sizeof(*rec) + recovery_size);
if (tdb->transaction->nesting != 0) {
- tdb->transaction->nesting--;
return TDB_SUCCESS;
}
/* upgrade the main transaction lock region to a write lock */
ecode = tdb_allrecord_upgrade(tdb);
if (ecode != TDB_SUCCESS) {
- tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
- "tdb_transaction_prepare_commit:"
- " failed to upgrade hash locks");
- _tdb_transaction_cancel(tdb);
return ecode;
}
during the commit */
ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
if (ecode != TDB_SUCCESS) {
- tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
- "tdb_transaction_prepare_commit:"
- " failed to get open lock");
- _tdb_transaction_cancel(tdb);
return ecode;
}
&tdb->transaction
->magic_offset);
if (ecode != TDB_SUCCESS) {
- tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
- "tdb_transaction_prepare_commit:"
- " failed to setup recovery data");
- _tdb_transaction_cancel(tdb);
return ecode;
}
}
tdb->file->map_size = tdb->transaction->old_map_size;
ecode = methods->expand_file(tdb, add);
if (ecode != TDB_SUCCESS) {
- tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
- "tdb_transaction_prepare_commit:"
- " expansion failed");
- _tdb_transaction_cancel(tdb);
return ecode;
}
}
if (!tdb->transaction->prepared) {
ecode = _tdb_transaction_prepare_commit(tdb);
- if (ecode != TDB_SUCCESS)
+ if (ecode != TDB_SUCCESS) {
+ _tdb_transaction_cancel(tdb);
return tdb->last_error = ecode;
+ }
}
methods = tdb->transaction->io_methods;
continue;
}
- offset = i * getpagesize();
- length = getpagesize();
+ offset = i * PAGESIZE;
+ length = PAGESIZE;
if (i == tdb->transaction->num_blocks-1) {
length = tdb->transaction->last_block_size;
}
ecode = methods->twrite(tdb, offset,
tdb->transaction->blocks[i], length);
if (ecode != TDB_SUCCESS) {
- tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
- "tdb_transaction_commit:"
- " write failed during commit");
-
/* we've overwritten part of the data and
possibly expanded the file, so we need to
run the crash recovery code */