tdb2: tdb_summary() implementation for tdb2.
authorRusty Russell <rusty@rustcorp.com.au>
Sat, 25 Sep 2010 07:39:44 +0000 (17:09 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Sat, 25 Sep 2010 07:39:44 +0000 (17:09 +0930)
ccan/tdb2/_info
ccan/tdb2/summary.c [new file with mode: 0644]
ccan/tdb2/tdb2.h
ccan/tdb2/test/run-summary.c [new file with mode: 0644]

index ecfcd233950e973e3cf07bcb370e44e00fb4bce8..661fb7d2db285bc8f9efba41da78d1f68933883c 100644 (file)
@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
                printf("ccan/likely\n");
                printf("ccan/asearch\n");
                printf("ccan/build_assert\n");
+               printf("ccan/tally\n");
                return 0;
        }
 
diff --git a/ccan/tdb2/summary.c b/ccan/tdb2/summary.c
new file mode 100644 (file)
index 0000000..e7a2919
--- /dev/null
@@ -0,0 +1,330 @@
+ /* 
+   Trivial Database 2: human-readable summary code
+   Copyright (C) Rusty Russell 2010
+   
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "private.h"
+#include <assert.h>
+#include <ccan/tally/tally.h>
+
+static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max)
+{
+       if (bucket <= 8) {
+               *min = *max = TDB_MIN_DATA_LEN + bucket * 8;
+       } else if (bucket == 9) {
+               /* FIXME: This is twisted; fix size_to_bucket. */
+               *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8;
+               *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
+       } else {
+               *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3));
+               *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
+       }
+       assert(size_to_bucket(63, *min) == bucket);
+       assert(size_to_bucket(63, *max) == bucket);
+       if (bucket > 8)
+               assert(size_to_bucket(63, *min - 8) == bucket - 1);
+       assert(size_to_bucket(63, *max + 8) == bucket + 1);
+}
+
+static int count_hash(struct tdb_context *tdb,
+                     tdb_off_t hash_off, unsigned bits)
+{
+       const tdb_off_t *h;
+       unsigned int i, count = 0;
+
+       h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
+       if (!h)
+               return -1;
+       for (i = 0; i < (1 << bits); i++)
+               count += (h[i] != 0);
+
+       tdb_access_release(tdb, h);
+       return count;
+}
+
+static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
+                               struct tally *zones,
+                               struct tally *hashes,
+                               struct tally *free,
+                               struct tally *keys,
+                               struct tally *data,
+                               struct tally *extra,
+                               struct tally *uncoal,
+                               uint64_t bucketlen[],
+                               unsigned int *num_buckets)
+{
+       struct free_zone_header zhdr;
+       tdb_off_t off;
+       tdb_len_t len;
+       unsigned int hdrlen;
+       tdb_len_t unc = 0;
+
+       if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
+               return TDB_OFF_ERR;
+
+       tally_add(zones, 1ULL << zhdr.zone_bits);
+       *num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits);
+
+       hdrlen = sizeof(zhdr)
+               + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
+       for (off = zone_off + hdrlen;
+            off < zone_off + (1ULL << zhdr.zone_bits);
+            off += len) {
+               union {
+                       struct tdb_used_record u;
+                       struct tdb_free_record f;
+               } pad, *p;
+               p = tdb_get(tdb, off, &pad, sizeof(pad));
+               if (!p)
+                       return TDB_OFF_ERR;
+               if (frec_magic(&p->f) == TDB_FREE_MAGIC) {
+                       len = p->f.data_len;
+                       tally_add(free, len);
+                       bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++;
+                       len += sizeof(p->u);
+                       unc++;
+               } else {
+                       if (unc) {
+                               tally_add(uncoal, unc);
+                               unc = 0;
+                       }
+                       len = sizeof(p->u)
+                               + rec_key_length(&p->u)
+                               + rec_data_length(&p->u)
+                               + rec_extra_padding(&p->u);
+
+                       /* FIXME: Use different magic for hashes? */
+                       if (!rec_key_length(&p->u) && !rec_hash(&p->u)) {
+                               int count = count_hash(tdb, off + sizeof(p->u),
+                                                      TDB_SUBLEVEL_HASH_BITS);
+                               if (count == -1)
+                                       return TDB_OFF_ERR;
+                               tally_add(hashes, count);
+                       } else {
+                               tally_add(keys, rec_key_length(&p->u));
+                               tally_add(data, rec_data_length(&p->u));
+                       }
+                       tally_add(extra, rec_extra_padding(&p->u));
+               }
+       }
+       if (unc)
+               tally_add(uncoal, unc);
+       return 1ULL << zhdr.zone_bits;
+}
+
+#define SUMMARY_FORMAT \
+       "Size of file/data: %zu/%zu\n" \
+       "Number of zones: %zu\n" \
+       "Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \
+       "Number of records: %zu\n" \
+       "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
+       "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
+       "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
+       "Number of free records: %zu\n" \
+       "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
+       "Number of uncoalesced records: %zu\n" \
+       "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
+       "Toplevel hash used: %u of %u\n" \
+       "Number of subhashes: %zu\n" \
+       "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
+       "Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
+
+#define BUCKET_SUMMARY_FORMAT_A                                        \
+       "Free bucket %zu: total entries %zu.\n"                 \
+       "Smallest/average/largest length: %zu/%zu/%zu\n%s"
+#define BUCKET_SUMMARY_FORMAT_B                                        \
+       "Free bucket %zu-%zu: total entries %zu.\n"             \
+       "Smallest/average/largest length: %zu/%zu/%zu\n%s"
+
+#define HISTO_WIDTH 70
+#define HISTO_HEIGHT 20
+
+char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
+{
+       tdb_off_t off;
+       tdb_len_t len;
+       unsigned int i, num_buckets, max_bucket = 0;
+       uint64_t total_buckets = 0;
+       struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal,
+               *buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL };
+       char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg,
+               *bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL };
+       char *ret = NULL;
+
+       if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
+               return NULL;
+
+       if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
+               tdb_allrecord_unlock(tdb, F_RDLCK);
+               return NULL;
+       }
+
+       /* Start stats off empty. */
+       zones = tally_new(HISTO_HEIGHT);
+       hashes = tally_new(HISTO_HEIGHT);
+       freet = tally_new(HISTO_HEIGHT);
+       keys = tally_new(HISTO_HEIGHT);
+       data = tally_new(HISTO_HEIGHT);
+       extra = tally_new(HISTO_HEIGHT);
+       uncoal = tally_new(HISTO_HEIGHT);
+       if (!zones || !hashes || !freet || !keys || !data || !extra
+           || !uncoal) {
+               tdb->ecode = TDB_ERR_OOM;
+               goto unlock;
+       }
+
+       for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
+               buckets[i] = tally_new(HISTO_HEIGHT);
+               if (!buckets[i]) {
+                       tdb->ecode = TDB_ERR_OOM;
+                       goto unlock;
+               }
+       }
+
+       for (off = sizeof(struct tdb_header);
+            off < tdb->map_size - 1;
+            off += len) {
+               uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 };
+               len = summarize_zone(tdb, off, zones, hashes, freet, keys,
+                                    data, extra, uncoal, bucketlen,
+                                    &num_buckets);
+               if (len == TDB_OFF_ERR)
+                       goto unlock;
+               for (i = 0; i < num_buckets; i++)
+                       tally_add(buckets[i], bucketlen[i]);
+               if (num_buckets > max_bucket)
+                       max_bucket = num_buckets;
+               total_buckets += num_buckets;
+       }
+
+       if (flags & TDB_SUMMARY_HISTOGRAMS) {
+               zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT);
+               hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
+               freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
+               keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
+               datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
+               extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
+               uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
+               for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
+                       bucketsg[i] = tally_histogram(buckets[i],
+                                                     HISTO_WIDTH,
+                                                     HISTO_HEIGHT);
+               }
+       } else {
+               zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg
+                       = NULL;
+       }
+
+       /* 20 is max length of a %llu. */
+       len = strlen(SUMMARY_FORMAT) + 33*20 + 1
+               + (zonesg ? strlen(zonesg) : 0)
+               + (hashesg ? strlen(hashesg) : 0)
+               + (freeg ? strlen(freeg) : 0)
+               + (keysg ? strlen(keysg) : 0)
+               + (datag ? strlen(datag) : 0)
+               + (extrag ? strlen(extrag) : 0)
+               + (uncoalg ? strlen(uncoalg) : 0);
+       for (i = 0; i < max_bucket; i++) {
+               len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20
+                       + (bucketsg[i] ? strlen(bucketsg[i]) : 0);
+       }
+
+       ret = malloc(len);
+       if (!ret)
+               goto unlock;
+
+       len = sprintf(ret, SUMMARY_FORMAT,
+                     (size_t)tdb->map_size,
+                     tally_num(keys) + tally_num(data),
+                     tally_num(zones),
+                     tally_min(zones), tally_mean(zones), tally_max(zones),
+                     zonesg ? zonesg : "",
+                     tally_num(keys),
+                     tally_min(keys), tally_mean(keys), tally_max(keys),
+                     keysg ? keysg : "",
+                     tally_min(data), tally_mean(data), tally_max(data),
+                     datag ? datag : "",
+                     tally_min(extra), tally_mean(extra), tally_max(extra),
+                     extrag ? extrag : "",
+                     tally_num(freet),
+                     tally_min(freet), tally_mean(freet), tally_max(freet),
+                     freeg ? freeg : "",
+                     tally_total(uncoal, NULL),
+                     tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
+                     uncoalg ? uncoalg : "",
+                     count_hash(tdb, offsetof(struct tdb_header, hashtable),
+                                TDB_TOPLEVEL_HASH_BITS),
+                     1 << TDB_TOPLEVEL_HASH_BITS,
+                     tally_num(hashes),
+                     tally_min(hashes), tally_mean(hashes), tally_max(hashes),
+                     hashesg ? hashesg : "",
+                     tally_total(keys, NULL) * 100.0 / tdb->map_size,
+                     tally_total(data, NULL) * 100.0 / tdb->map_size,
+                     tally_total(extra, NULL) * 100.0 / tdb->map_size,
+                     tally_total(freet, NULL) * 100.0 / tdb->map_size,
+                     (tally_num(keys) + tally_num(freet) + tally_num(hashes))
+                     * sizeof(struct tdb_used_record) * 100.0 / tdb->map_size,
+                     (tally_num(zones) * sizeof(struct free_zone_header)
+                      + total_buckets * sizeof(tdb_off_t))
+                     * 100.0 / tdb->map_size,
+                     (tally_num(hashes)
+                      * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
+                      + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS))
+                     * 100.0 / tdb->map_size);
+
+       for (i = 0; i < max_bucket; i++) {
+               size_t min, max;
+               sizes_for_bucket(i, &min, &max);
+               if (min == max) {
+                       len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A,
+                                      min, tally_total(buckets[i], NULL),
+                                      tally_min(buckets[i]),
+                                      tally_mean(buckets[i]),
+                                      tally_max(buckets[i]),
+                                      bucketsg[i] ? bucketsg[i] : "");
+               } else {
+                       len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B,
+                                      min, max, tally_total(buckets[i], NULL),
+                                      tally_min(buckets[i]),
+                                      tally_mean(buckets[i]),
+                                      tally_max(buckets[i]),
+                                      bucketsg[i] ? bucketsg[i] : "");
+               }
+       }
+
+unlock:
+       free(zonesg);
+       free(hashesg);
+       free(freeg);
+       free(keysg);
+       free(datag);
+       free(extrag);
+       free(uncoalg);
+       free(zones);
+       free(hashes);
+       free(freet);
+       free(keys);
+       free(data);
+       free(extra);
+       free(uncoal);
+       for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
+               free(buckets[i]);
+               free(bucketsg[i]);
+       }
+
+       tdb_allrecord_unlock(tdb, F_RDLCK);
+       tdb_unlock_expand(tdb, F_RDLCK);
+       return ret;
+}
index f856827fcc8f9997598665643b5a0fb44417cec7..5e74bfe7883e48e7f1e52e1b5d1696d1ebc433fe 100644 (file)
@@ -63,6 +63,9 @@ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK,
                TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOEXIST,
                TDB_ERR_EINVAL, TDB_ERR_RDONLY, TDB_ERR_NESTING };
 
+/* flags for tdb_summary. Logical or to combine. */
+enum tdb_summary_flags { TDB_SUMMARY_HISTOGRAMS = 1 };
+
 /* debugging uses one of the following levels */
 enum tdb_debug_level {TDB_DEBUG_FATAL = 0, TDB_DEBUG_ERROR, 
                      TDB_DEBUG_WARNING, TDB_DEBUG_TRACE};
@@ -143,6 +146,8 @@ int tdb_check(struct tdb_context *tdb,
 enum TDB_ERROR tdb_error(struct tdb_context *tdb);
 const char *tdb_errorstr(struct tdb_context *tdb);
 
+char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags);
+
 extern struct tdb_data tdb_null;
 
 #ifdef  __cplusplus
diff --git a/ccan/tdb2/test/run-summary.c b/ccan/tdb2/test/run-summary.c
new file mode 100644 (file)
index 0000000..d6ccf2f
--- /dev/null
@@ -0,0 +1,60 @@
+#include <ccan/tdb2/tdb.c>
+#include <ccan/tdb2/free.c>
+#include <ccan/tdb2/lock.c>
+#include <ccan/tdb2/io.c>
+#include <ccan/tdb2/hash.c>
+#include <ccan/tdb2/check.c>
+#include <ccan/tdb2/summary.c>
+#include <ccan/tap/tap.h>
+#include "logging.h"
+
+int main(int argc, char *argv[])
+{
+       unsigned int i, j;
+       struct tdb_context *tdb;
+       int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP,
+                       TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT,
+                       TDB_NOMMAP|TDB_CONVERT };
+       struct tdb_data key = { (unsigned char *)&j, sizeof(j) };
+       struct tdb_data data = { (unsigned char *)&j, sizeof(j) };
+       char *summary;
+
+       plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 7) + 1);
+       for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) {
+               tdb = tdb_open("run-summary.tdb", flags[i],
+                              O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr);
+               ok1(tdb);
+               if (!tdb)
+                       continue;
+
+               /* Put some stuff in there. */
+               for (j = 0; j < 500; j++)
+                       if (tdb_store(tdb, key, data, TDB_REPLACE) != 0)
+                               fail("Storing in tdb");
+
+               for (j = 0;
+                    j <= TDB_SUMMARY_HISTOGRAMS;
+                    j += TDB_SUMMARY_HISTOGRAMS) {
+                       summary = tdb_summary(tdb, j);
+                       ok1(strstr(summary, "Number of records: 500\n"));
+                       ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n"));
+                       ok1(strstr(summary, "Smallest/average/largest data: 4/4/4\n"));
+                       ok1(strstr(summary, "Free bucket 8"));
+                       ok1(strstr(summary, "Free bucket 16"));
+                       ok1(strstr(summary, "Free bucket 24"));
+                       if (j == TDB_SUMMARY_HISTOGRAMS)
+                               ok1(strstr(summary, "|")
+                                   && strstr(summary, "*"));
+                       else
+                               ok1(!strstr(summary, "|")
+                                   && !strstr(summary, "*"));
+                       free(summary);
+               }
+               tdb_close(tdb);
+       }
+
+       ok1(tap_log_messages == 0);
+       return exit_status();
+}
+
+