2 Trivial Database 2: human-readable summary code
3 Copyright (C) Rusty Russell 2010
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 3 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include <ccan/tally/tally.h>
22 static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max)
25 *min = *max = TDB_MIN_DATA_LEN + bucket * 8;
26 } else if (bucket == 9) {
27 /* FIXME: This is twisted; fix size_to_bucket. */
28 *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8;
29 *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
31 *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3));
32 *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
34 assert(size_to_bucket(63, *min) == bucket);
35 assert(size_to_bucket(63, *max) == bucket);
37 assert(size_to_bucket(63, *min - 8) == bucket - 1);
38 assert(size_to_bucket(63, *max + 8) == bucket + 1);
41 static int count_hash(struct tdb_context *tdb,
42 tdb_off_t hash_off, unsigned bits)
45 unsigned int i, count = 0;
47 h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
50 for (i = 0; i < (1 << bits); i++)
53 tdb_access_release(tdb, h);
57 static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
66 unsigned int *num_buckets)
68 struct free_zone_header zhdr;
74 if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
77 tally_add(zones, 1ULL << zhdr.zone_bits);
78 *num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits);
81 + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
83 end = zone_off + (1ULL << zhdr.zone_bits);
84 if (end > tdb->map_size)
87 for (off = zone_off + hdrlen; off < end; off += len) {
89 struct tdb_used_record u;
90 struct tdb_free_record f;
92 p = tdb_get(tdb, off, &pad, sizeof(pad));
95 if (rec_magic(&p->u) != TDB_MAGIC) {
98 bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++;
103 tally_add(uncoal, unc);
107 + rec_key_length(&p->u)
108 + rec_data_length(&p->u)
109 + rec_extra_padding(&p->u);
111 /* FIXME: Use different magic for hashes? */
112 if (!rec_key_length(&p->u) && !rec_hash(&p->u)) {
113 int count = count_hash(tdb, off + sizeof(p->u),
114 TDB_SUBLEVEL_HASH_BITS);
117 tally_add(hashes, count);
119 tally_add(keys, rec_key_length(&p->u));
120 tally_add(data, rec_data_length(&p->u));
122 tally_add(extra, rec_extra_padding(&p->u));
126 tally_add(uncoal, unc);
127 return 1ULL << zhdr.zone_bits;
130 #define SUMMARY_FORMAT \
131 "Size of file/data: %zu/%zu\n" \
132 "Number of zones: %zu\n" \
133 "Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \
134 "Number of records: %zu\n" \
135 "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
136 "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
137 "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
138 "Number of free records: %zu\n" \
139 "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
140 "Number of uncoalesced records: %zu\n" \
141 "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
142 "Toplevel hash used: %u of %u\n" \
143 "Number of subhashes: %zu\n" \
144 "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
145 "Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
147 #define BUCKET_SUMMARY_FORMAT_A \
148 "Free bucket %zu: total entries %zu.\n" \
149 "Smallest/average/largest length: %zu/%zu/%zu\n%s"
150 #define BUCKET_SUMMARY_FORMAT_B \
151 "Free bucket %zu-%zu: total entries %zu.\n" \
152 "Smallest/average/largest length: %zu/%zu/%zu\n%s"
154 #define HISTO_WIDTH 70
155 #define HISTO_HEIGHT 20
157 char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
161 unsigned int i, num_buckets, max_bucket = 0;
162 uint64_t total_buckets = 0;
163 struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal,
164 *buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL };
165 char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg,
166 *bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL };
169 zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
171 if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
174 if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
175 tdb_allrecord_unlock(tdb, F_RDLCK);
179 /* Start stats off empty. */
180 zones = tally_new(HISTO_HEIGHT);
181 hashes = tally_new(HISTO_HEIGHT);
182 freet = tally_new(HISTO_HEIGHT);
183 keys = tally_new(HISTO_HEIGHT);
184 data = tally_new(HISTO_HEIGHT);
185 extra = tally_new(HISTO_HEIGHT);
186 uncoal = tally_new(HISTO_HEIGHT);
187 if (!zones || !hashes || !freet || !keys || !data || !extra
189 tdb->ecode = TDB_ERR_OOM;
193 for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
194 buckets[i] = tally_new(HISTO_HEIGHT);
196 tdb->ecode = TDB_ERR_OOM;
201 for (off = sizeof(struct tdb_header);
202 off < tdb->map_size - 1;
204 uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 };
205 len = summarize_zone(tdb, off, zones, hashes, freet, keys,
206 data, extra, uncoal, bucketlen,
208 if (len == TDB_OFF_ERR)
210 for (i = 0; i < num_buckets; i++)
211 tally_add(buckets[i], bucketlen[i]);
212 if (num_buckets > max_bucket)
213 max_bucket = num_buckets;
214 total_buckets += num_buckets;
217 if (flags & TDB_SUMMARY_HISTOGRAMS) {
218 zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT);
219 hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
220 freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
221 keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
222 datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
223 extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
224 uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
225 for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
226 bucketsg[i] = tally_histogram(buckets[i],
232 /* 20 is max length of a %llu. */
233 len = strlen(SUMMARY_FORMAT) + 33*20 + 1
234 + (zonesg ? strlen(zonesg) : 0)
235 + (hashesg ? strlen(hashesg) : 0)
236 + (freeg ? strlen(freeg) : 0)
237 + (keysg ? strlen(keysg) : 0)
238 + (datag ? strlen(datag) : 0)
239 + (extrag ? strlen(extrag) : 0)
240 + (uncoalg ? strlen(uncoalg) : 0);
241 for (i = 0; i < max_bucket; i++) {
242 len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20
243 + (bucketsg[i] ? strlen(bucketsg[i]) : 0);
250 len = sprintf(ret, SUMMARY_FORMAT,
251 (size_t)tdb->map_size,
252 tally_num(keys) + tally_num(data),
254 tally_min(zones), tally_mean(zones), tally_max(zones),
255 zonesg ? zonesg : "",
257 tally_min(keys), tally_mean(keys), tally_max(keys),
259 tally_min(data), tally_mean(data), tally_max(data),
261 tally_min(extra), tally_mean(extra), tally_max(extra),
262 extrag ? extrag : "",
264 tally_min(freet), tally_mean(freet), tally_max(freet),
266 tally_total(uncoal, NULL),
267 tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
268 uncoalg ? uncoalg : "",
269 count_hash(tdb, offsetof(struct tdb_header, hashtable),
270 TDB_TOPLEVEL_HASH_BITS),
271 1 << TDB_TOPLEVEL_HASH_BITS,
273 tally_min(hashes), tally_mean(hashes), tally_max(hashes),
274 hashesg ? hashesg : "",
275 tally_total(keys, NULL) * 100.0 / tdb->map_size,
276 tally_total(data, NULL) * 100.0 / tdb->map_size,
277 tally_total(extra, NULL) * 100.0 / tdb->map_size,
278 tally_total(freet, NULL) * 100.0 / tdb->map_size,
279 (tally_num(keys) + tally_num(freet) + tally_num(hashes))
280 * sizeof(struct tdb_used_record) * 100.0 / tdb->map_size,
281 (tally_num(zones) * sizeof(struct free_zone_header)
282 + total_buckets * sizeof(tdb_off_t))
283 * 100.0 / tdb->map_size,
285 * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
286 + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS))
287 * 100.0 / tdb->map_size);
289 for (i = 0; i < max_bucket; i++) {
291 sizes_for_bucket(i, &min, &max);
293 len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A,
294 min, tally_total(buckets[i], NULL),
295 tally_min(buckets[i]),
296 tally_mean(buckets[i]),
297 tally_max(buckets[i]),
298 bucketsg[i] ? bucketsg[i] : "");
300 len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B,
301 min, max, tally_total(buckets[i], NULL),
302 tally_min(buckets[i]),
303 tally_mean(buckets[i]),
304 tally_max(buckets[i]),
305 bucketsg[i] ? bucketsg[i] : "");
324 for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
329 tdb_allrecord_unlock(tdb, F_RDLCK);
330 tdb_unlock_expand(tdb, F_RDLCK);