From: Rusty Russell Date: Sat, 25 Sep 2010 07:39:44 +0000 (+0930) Subject: tdb2: tdb_summary() implementation for tdb2. X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=46d8f62015fb8bf6fe173573a55ea3f6324cee0a;ds=sidebyside tdb2: tdb_summary() implementation for tdb2. --- diff --git a/ccan/tdb2/_info b/ccan/tdb2/_info index ecfcd233..661fb7d2 100644 --- a/ccan/tdb2/_info +++ b/ccan/tdb2/_info @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) printf("ccan/likely\n"); printf("ccan/asearch\n"); printf("ccan/build_assert\n"); + printf("ccan/tally\n"); return 0; } diff --git a/ccan/tdb2/summary.c b/ccan/tdb2/summary.c new file mode 100644 index 00000000..e7a29196 --- /dev/null +++ b/ccan/tdb2/summary.c @@ -0,0 +1,330 @@ + /* + Trivial Database 2: human-readable summary code + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include + +static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max) +{ + if (bucket <= 8) { + *min = *max = TDB_MIN_DATA_LEN + bucket * 8; + } else if (bucket == 9) { + /* FIXME: This is twisted; fix size_to_bucket. */ + *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8; + *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8; + } else { + *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)); + *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8; + } + assert(size_to_bucket(63, *min) == bucket); + assert(size_to_bucket(63, *max) == bucket); + if (bucket > 8) + assert(size_to_bucket(63, *min - 8) == bucket - 1); + assert(size_to_bucket(63, *max + 8) == bucket + 1); +} + +static int count_hash(struct tdb_context *tdb, + tdb_off_t hash_off, unsigned bits) +{ + const tdb_off_t *h; + unsigned int i, count = 0; + + h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true); + if (!h) + return -1; + for (i = 0; i < (1 << bits); i++) + count += (h[i] != 0); + + tdb_access_release(tdb, h); + return count; +} + +static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off, + struct tally *zones, + struct tally *hashes, + struct tally *free, + struct tally *keys, + struct tally *data, + struct tally *extra, + struct tally *uncoal, + uint64_t bucketlen[], + unsigned int *num_buckets) +{ + struct free_zone_header zhdr; + tdb_off_t off; + tdb_len_t len; + unsigned int hdrlen; + tdb_len_t unc = 0; + + if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1) + return TDB_OFF_ERR; + + tally_add(zones, 1ULL << zhdr.zone_bits); + *num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits); + + hdrlen = sizeof(zhdr) + + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t); + for (off = zone_off + hdrlen; + off < zone_off + (1ULL << zhdr.zone_bits); + off += len) { + union { + struct tdb_used_record u; + struct tdb_free_record f; + } pad, *p; + p = tdb_get(tdb, off, &pad, sizeof(pad)); + if (!p) + return TDB_OFF_ERR; + if (frec_magic(&p->f) == TDB_FREE_MAGIC) { + len = p->f.data_len; + tally_add(free, len); + bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++; + len += sizeof(p->u); + unc++; + } else { + if (unc) { + tally_add(uncoal, unc); + unc = 0; + } + len = sizeof(p->u) + + rec_key_length(&p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + + /* FIXME: Use different magic for hashes? */ + if (!rec_key_length(&p->u) && !rec_hash(&p->u)) { + int count = count_hash(tdb, off + sizeof(p->u), + TDB_SUBLEVEL_HASH_BITS); + if (count == -1) + return TDB_OFF_ERR; + tally_add(hashes, count); + } else { + tally_add(keys, rec_key_length(&p->u)); + tally_add(data, rec_data_length(&p->u)); + } + tally_add(extra, rec_extra_padding(&p->u)); + } + } + if (unc) + tally_add(uncoal, unc); + return 1ULL << zhdr.zone_bits; +} + +#define SUMMARY_FORMAT \ + "Size of file/data: %zu/%zu\n" \ + "Number of zones: %zu\n" \ + "Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \ + "Number of records: %zu\n" \ + "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest data: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \ + "Number of free records: %zu\n" \ + "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \ + "Number of uncoalesced records: %zu\n" \ + "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ + "Toplevel hash used: %u of %u\n" \ + "Number of subhashes: %zu\n" \ + "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \ + "Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" + +#define BUCKET_SUMMARY_FORMAT_A \ + "Free bucket %zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" +#define BUCKET_SUMMARY_FORMAT_B \ + "Free bucket %zu-%zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" + +#define HISTO_WIDTH 70 +#define HISTO_HEIGHT 20 + +char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags) +{ + tdb_off_t off; + tdb_len_t len; + unsigned int i, num_buckets, max_bucket = 0; + uint64_t total_buckets = 0; + struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal, + *buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL }; + char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg, + *bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL }; + char *ret = NULL; + + if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0) + return NULL; + + if (tdb_lock_expand(tdb, F_RDLCK) != 0) { + tdb_allrecord_unlock(tdb, F_RDLCK); + return NULL; + } + + /* Start stats off empty. */ + zones = tally_new(HISTO_HEIGHT); + hashes = tally_new(HISTO_HEIGHT); + freet = tally_new(HISTO_HEIGHT); + keys = tally_new(HISTO_HEIGHT); + data = tally_new(HISTO_HEIGHT); + extra = tally_new(HISTO_HEIGHT); + uncoal = tally_new(HISTO_HEIGHT); + if (!zones || !hashes || !freet || !keys || !data || !extra + || !uncoal) { + tdb->ecode = TDB_ERR_OOM; + goto unlock; + } + + for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { + buckets[i] = tally_new(HISTO_HEIGHT); + if (!buckets[i]) { + tdb->ecode = TDB_ERR_OOM; + goto unlock; + } + } + + for (off = sizeof(struct tdb_header); + off < tdb->map_size - 1; + off += len) { + uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 }; + len = summarize_zone(tdb, off, zones, hashes, freet, keys, + data, extra, uncoal, bucketlen, + &num_buckets); + if (len == TDB_OFF_ERR) + goto unlock; + for (i = 0; i < num_buckets; i++) + tally_add(buckets[i], bucketlen[i]); + if (num_buckets > max_bucket) + max_bucket = num_buckets; + total_buckets += num_buckets; + } + + if (flags & TDB_SUMMARY_HISTOGRAMS) { + zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT); + hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); + freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); + keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); + datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); + extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); + uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); + for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { + bucketsg[i] = tally_histogram(buckets[i], + HISTO_WIDTH, + HISTO_HEIGHT); + } + } else { + zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg + = NULL; + } + + /* 20 is max length of a %llu. */ + len = strlen(SUMMARY_FORMAT) + 33*20 + 1 + + (zonesg ? strlen(zonesg) : 0) + + (hashesg ? strlen(hashesg) : 0) + + (freeg ? strlen(freeg) : 0) + + (keysg ? strlen(keysg) : 0) + + (datag ? strlen(datag) : 0) + + (extrag ? strlen(extrag) : 0) + + (uncoalg ? strlen(uncoalg) : 0); + for (i = 0; i < max_bucket; i++) { + len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20 + + (bucketsg[i] ? strlen(bucketsg[i]) : 0); + } + + ret = malloc(len); + if (!ret) + goto unlock; + + len = sprintf(ret, SUMMARY_FORMAT, + (size_t)tdb->map_size, + tally_num(keys) + tally_num(data), + tally_num(zones), + tally_min(zones), tally_mean(zones), tally_max(zones), + zonesg ? zonesg : "", + tally_num(keys), + tally_min(keys), tally_mean(keys), tally_max(keys), + keysg ? keysg : "", + tally_min(data), tally_mean(data), tally_max(data), + datag ? datag : "", + tally_min(extra), tally_mean(extra), tally_max(extra), + extrag ? extrag : "", + tally_num(freet), + tally_min(freet), tally_mean(freet), tally_max(freet), + freeg ? freeg : "", + tally_total(uncoal, NULL), + tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), + uncoalg ? uncoalg : "", + count_hash(tdb, offsetof(struct tdb_header, hashtable), + TDB_TOPLEVEL_HASH_BITS), + 1 << TDB_TOPLEVEL_HASH_BITS, + tally_num(hashes), + tally_min(hashes), tally_mean(hashes), tally_max(hashes), + hashesg ? hashesg : "", + tally_total(keys, NULL) * 100.0 / tdb->map_size, + tally_total(data, NULL) * 100.0 / tdb->map_size, + tally_total(extra, NULL) * 100.0 / tdb->map_size, + tally_total(freet, NULL) * 100.0 / tdb->map_size, + (tally_num(keys) + tally_num(freet) + tally_num(hashes)) + * sizeof(struct tdb_used_record) * 100.0 / tdb->map_size, + (tally_num(zones) * sizeof(struct free_zone_header) + + total_buckets * sizeof(tdb_off_t)) + * 100.0 / tdb->map_size, + (tally_num(hashes) + * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) + + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS)) + * 100.0 / tdb->map_size); + + for (i = 0; i < max_bucket; i++) { + size_t min, max; + sizes_for_bucket(i, &min, &max); + if (min == max) { + len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A, + min, tally_total(buckets[i], NULL), + tally_min(buckets[i]), + tally_mean(buckets[i]), + tally_max(buckets[i]), + bucketsg[i] ? bucketsg[i] : ""); + } else { + len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B, + min, max, tally_total(buckets[i], NULL), + tally_min(buckets[i]), + tally_mean(buckets[i]), + tally_max(buckets[i]), + bucketsg[i] ? bucketsg[i] : ""); + } + } + +unlock: + free(zonesg); + free(hashesg); + free(freeg); + free(keysg); + free(datag); + free(extrag); + free(uncoalg); + free(zones); + free(hashes); + free(freet); + free(keys); + free(data); + free(extra); + free(uncoal); + for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) { + free(buckets[i]); + free(bucketsg[i]); + } + + tdb_allrecord_unlock(tdb, F_RDLCK); + tdb_unlock_expand(tdb, F_RDLCK); + return ret; +} diff --git a/ccan/tdb2/tdb2.h b/ccan/tdb2/tdb2.h index f856827f..5e74bfe7 100644 --- a/ccan/tdb2/tdb2.h +++ b/ccan/tdb2/tdb2.h @@ -63,6 +63,9 @@ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOEXIST, TDB_ERR_EINVAL, TDB_ERR_RDONLY, TDB_ERR_NESTING }; +/* flags for tdb_summary. Logical or to combine. */ +enum tdb_summary_flags { TDB_SUMMARY_HISTOGRAMS = 1 }; + /* debugging uses one of the following levels */ enum tdb_debug_level {TDB_DEBUG_FATAL = 0, TDB_DEBUG_ERROR, TDB_DEBUG_WARNING, TDB_DEBUG_TRACE}; @@ -143,6 +146,8 @@ int tdb_check(struct tdb_context *tdb, enum TDB_ERROR tdb_error(struct tdb_context *tdb); const char *tdb_errorstr(struct tdb_context *tdb); +char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags); + extern struct tdb_data tdb_null; #ifdef __cplusplus diff --git a/ccan/tdb2/test/run-summary.c b/ccan/tdb2/test/run-summary.c new file mode 100644 index 00000000..d6ccf2fa --- /dev/null +++ b/ccan/tdb2/test/run-summary.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = { (unsigned char *)&j, sizeof(j) }; + struct tdb_data data = { (unsigned char *)&j, sizeof(j) }; + char *summary; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 7) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-summary.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + /* Put some stuff in there. */ + for (j = 0; j < 500; j++) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + fail("Storing in tdb"); + + for (j = 0; + j <= TDB_SUMMARY_HISTOGRAMS; + j += TDB_SUMMARY_HISTOGRAMS) { + summary = tdb_summary(tdb, j); + ok1(strstr(summary, "Number of records: 500\n")); + ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n")); + ok1(strstr(summary, "Smallest/average/largest data: 4/4/4\n")); + ok1(strstr(summary, "Free bucket 8")); + ok1(strstr(summary, "Free bucket 16")); + ok1(strstr(summary, "Free bucket 24")); + if (j == TDB_SUMMARY_HISTOGRAMS) + ok1(strstr(summary, "|") + && strstr(summary, "*")); + else + ok1(!strstr(summary, "|") + && !strstr(summary, "*")); + free(summary); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} + +