This is good for deep debugging.
struct tdb_free_record pad, *r;
tdb_off_t end;
+ add_stat(tdb, alloc_coalesce_tried, 1);
end = off + sizeof(struct tdb_used_record) + data_len;
while (end < tdb->map_size) {
nb_off = bucket_off(flist_offset(tdb, flist), bucket);
/* We may be violating lock order here, so best effort. */
- if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1)
+ if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1) {
+ add_stat(tdb, alloc_coalesce_lockfail, 1);
break;
+ }
/* Now we have lock, re-check. */
r = tdb_get(tdb, end, &pad, sizeof(pad));
}
if (unlikely(frec_magic(r) != TDB_FREE_MAGIC)) {
+ add_stat(tdb, alloc_coalesce_race, 1);
tdb_unlock_free_bucket(tdb, nb_off);
break;
}
if (unlikely(frec_flist(r) != flist)
|| unlikely(size_to_bucket(frec_len(r)) != bucket)) {
+ add_stat(tdb, alloc_coalesce_race, 1);
tdb_unlock_free_bucket(tdb, nb_off);
break;
}
end += sizeof(struct tdb_used_record) + frec_len(r);
tdb_unlock_free_bucket(tdb, nb_off);
+ add_stat(tdb, alloc_coalesce_num_merged, 1);
}
/* Didn't find any adjacent free? */
if (tdb_access_commit(tdb, r) != 0)
goto err;
+ add_stat(tdb, alloc_coalesce_succeeded, 1);
tdb_unlock_free_bucket(tdb, b_off);
if (add_free_record(tdb, off, end - off) == -1)
double multiplier;
size_t size = adjust_size(keylen, datalen);
+ add_stat(tdb, allocs, 1);
again:
b_off = bucket_off(flist_off, bucket);
/* Bucket of leftover will be <= current bucket, so nested
* locking is allowed. */
if (leftover) {
+ add_stat(tdb, alloc_leftover, 1);
if (add_free_record(tdb,
best_off + sizeof(rec)
+ frec_len(&best) - leftover,
if (off == TDB_OFF_ERR)
return TDB_OFF_ERR;
if (off != 0) {
+ if (b == start_b)
+ add_stat(tdb, alloc_bucket_exact, 1);
+ if (b == TDB_FREE_BUCKETS - 1)
+ add_stat(tdb, alloc_bucket_max, 1);
/* Worked? Stay using this list. */
tdb->flist_off = flist_off;
tdb->flist = flist;
/* Hmm, try next list. */
flist_off = next_flist(tdb, flist_off);
flist++;
+
if (flist_off == 0) {
wrapped = true;
flist_off = first_flist(tdb);
/* We need to drop this lock before adding free record. */
tdb_unlock_expand(tdb, F_WRLCK);
+ add_stat(tdb, expands, 1);
return add_free_record(tdb, old_size, wanted);
}
if (subhash == TDB_OFF_ERR)
return -1;
+ add_stat(tdb, alloc_subhash, 1);
if (zero_out(tdb, subhash + sizeof(struct tdb_used_record),
sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) == -1)
return -1;
file isn't sparse, which would be very bad if we ran out of
disk. This must be done with write, not via mmap */
memset(buf, 0x43, sizeof(buf));
- if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
+ if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
return -1;
tdb->map_size += addition;
tdb_mmap(tdb);
return (char *)tdb->map_ptr + off;
}
+void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val)
+{
+ if ((uintptr_t)stat < (uintptr_t)tdb->stats + tdb->stats->size)
+ *stat += val;
+}
+
static const struct tdb_methods io_methods = {
tdb_read,
tdb_write,
fl.l_len = len;
fl.l_pid = 0;
+ add_stat(tdb, lock_lowlevel, 1);
if (waitflag)
return fcntl(tdb->fd, F_SETLKW, &fl);
- else
+ else {
+ add_stat(tdb, lock_nonblock, 1);
return fcntl(tdb->fd, F_SETLK, &fl);
+ }
}
static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
if (tdb->flags & TDB_NOLOCK)
return 0;
+ add_stat(tdb, locks, 1);
+
new_lck = find_nestlock(tdb, offset);
if (new_lck) {
if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
return -1;
}
+ add_stat(tdb, locks, 1);
again:
/* Lock hashes, gradually. */
if (tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
uint64_t num_lockrecs;
struct tdb_lock_type *lockrecs;
+ struct tdb_attribute_stats *stats;
+
/* Single list of all TDBs, to avoid multiple opens. */
struct tdb_context *next;
dev_t device;
int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
void *rec, size_t len);
+/* Adds a stat, if it's in range. */
+void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val);
+#define add_stat(tdb, statname, val) \
+ do { \
+ if (unlikely((tdb)->stats)) \
+ add_stat_((tdb), &(tdb)->stats->statname, (val)); \
+ } while (0)
/* lock.c: */
void tdb_lock_init(struct tdb_context *tdb);
tdb->log = null_log_fn;
tdb->log_priv = NULL;
tdb->transaction = NULL;
+ tdb->stats = NULL;
tdb_hash_init(tdb);
tdb_io_init(tdb);
tdb_lock_init(tdb);
case TDB_ATTRIBUTE_SEED:
seed = &attr->seed;
break;
+ case TDB_ATTRIBUTE_STATS:
+ tdb->stats = &attr->stats;
+ /* They have stats we don't know about? Tell them. */
+ if (tdb->stats->size > sizeof(attr->stats))
+ tdb->stats->size = sizeof(attr->stats);
+ break;
default:
tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
"tdb_open: unknown attribute type %u\n",
/* We didn't like the existing one: remove it. */
if (old_off) {
+ add_stat(tdb, frees, 1);
add_free_record(tdb, old_off,
sizeof(struct tdb_used_record)
+ key.dsize + old_room);
goto unlock_err;
/* Free the deleted entry. */
+ add_stat(tdb, frees, 1);
if (add_free_record(tdb, off,
sizeof(struct tdb_used_record)
+ rec_key_length(&rec)
enum tdb_attribute_type {
TDB_ATTRIBUTE_LOG = 0,
TDB_ATTRIBUTE_HASH = 1,
- TDB_ATTRIBUTE_SEED = 2
+ TDB_ATTRIBUTE_SEED = 2,
+ TDB_ATTRIBUTE_STATS = 3
};
struct tdb_attribute_base {
uint64_t seed;
};
+struct tdb_attribute_stats {
+ struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */
+ size_t size; /* = sizeof(struct tdb_attribute_stats) */
+ uint64_t allocs;
+ uint64_t alloc_subhash;
+ uint64_t alloc_bucket_exact;
+ uint64_t alloc_bucket_max;
+ uint64_t alloc_leftover;
+ uint64_t alloc_coalesce_tried;
+ uint64_t alloc_coalesce_lockfail;
+ uint64_t alloc_coalesce_race;
+ uint64_t alloc_coalesce_succeeded;
+ uint64_t alloc_coalesce_num_merged;
+ uint64_t expands;
+ uint64_t frees;
+ uint64_t locks;
+ uint64_t lock_lowlevel;
+ uint64_t lock_nonblock;
+};
+
union tdb_attribute {
struct tdb_attribute_base base;
struct tdb_attribute_log log;
struct tdb_attribute_hash hash;
struct tdb_attribute_seed seed;
+ struct tdb_attribute_stats stats;
};
struct tdb_context *tdb_open(const char *name, int tdb_flags,
return 0;
}
+static void dump_and_clear_stats(struct tdb_attribute_stats *stats)
+{
+ printf("allocs = %llu\n",
+ (unsigned long long)stats->allocs);
+ printf(" alloc_subhash = %llu\n",
+ (unsigned long long)stats->alloc_subhash);
+ printf(" alloc_bucket_exact = %llu\n",
+ (unsigned long long)stats->alloc_bucket_exact);
+ printf(" alloc_bucket_max = %llu\n",
+ (unsigned long long)stats->alloc_bucket_max);
+ printf(" alloc_leftover = %llu\n",
+ (unsigned long long)stats->alloc_leftover);
+ printf(" alloc_coalesce_tried = %llu\n",
+ (unsigned long long)stats->alloc_coalesce_tried);
+ printf(" alloc_coalesce_lockfail = %llu\n",
+ (unsigned long long)stats->alloc_coalesce_lockfail);
+ printf(" alloc_coalesce_race = %llu\n",
+ (unsigned long long)stats->alloc_coalesce_race);
+ printf(" alloc_coalesce_succeeded = %llu\n",
+ (unsigned long long)stats->alloc_coalesce_succeeded);
+ printf(" alloc_coalesce_num_merged = %llu\n",
+ (unsigned long long)stats->alloc_coalesce_num_merged);
+ printf("expands = %llu\n",
+ (unsigned long long)stats->expands);
+ printf("frees = %llu\n",
+ (unsigned long long)stats->frees);
+ printf("locks = %llu\n",
+ (unsigned long long)stats->locks);
+ printf(" lock_lowlevel = %llu\n",
+ (unsigned long long)stats->lock_lowlevel);
+ printf(" lock_nonblock = %llu\n",
+ (unsigned long long)stats->lock_nonblock);
+
+ /* Now clear. */
+ memset(&stats->allocs, 0, (char *)(stats+1) - (char *)&stats->allocs);
+}
+
int main(int argc, char *argv[])
{
unsigned int i, j, num = 1000, stage = 0, stopat = -1;
TDB_DATA key, data;
struct tdb_context *tdb;
struct timeval start, stop;
- union tdb_attribute seed;
+ union tdb_attribute seed, stats;
/* Try to keep benchmarks even. */
seed.base.attr = TDB_ATTRIBUTE_SEED;
seed.base.next = NULL;
seed.seed.seed = 0;
+ memset(&stats, 0, sizeof(stats));
+ stats.base.attr = TDB_ATTRIBUTE_STATS;
+ stats.base.next = NULL;
+ stats.stats.size = sizeof(stats);
+
if (argv[1] && strcmp(argv[1], "--internal") == 0) {
flags = TDB_INTERNAL;
argc--;
argc--;
argv++;
}
+ if (argv[1] && strcmp(argv[1], "--stats") == 0) {
+ seed.base.next = &stats;
+ argc--;
+ argv++;
+ }
tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC,
0600, &seed);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
errx(1, "committing transaction: %s", tdb_errorstr(tdb));
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
if (++stage == stopat)
exit(0);
printf(" %zu ns (%zu bytes)\n",
normalize(&start, &stop, num), file_size());
+ if (seed.base.next)
+ dump_and_clear_stats(&stats.stats);
+ if (++stage == stopat)
+ exit(0);
+
return 0;
}
us an area that is being currently used (as of the start of
the transaction) */
if (recovery_head != 0) {
+ add_stat(tdb, frees, 1);
if (add_free_record(tdb, recovery_head,
sizeof(rec) + rec.max_len) != 0) {
tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,