tdb2: remove tailer
[ccan] / ccan / tdb2 / summary.c
1  /* 
2    Trivial Database 2: human-readable summary code
3    Copyright (C) Rusty Russell 2010
4    
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <assert.h>
20 #include <ccan/tally/tally.h>
21
22 static void sizes_for_bucket(unsigned bucket, size_t *min, size_t *max)
23 {
24         if (bucket <= 8) {
25                 *min = *max = TDB_MIN_DATA_LEN + bucket * 8;
26         } else if (bucket == 9) {
27                 /* FIXME: This is twisted; fix size_to_bucket. */
28                 *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3)) + 8;
29                 *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
30         } else {
31                 *min = TDB_MIN_DATA_LEN + (1ULL << (bucket - 3));
32                 *max = TDB_MIN_DATA_LEN + (1ULL << (bucket - 2)) - 8;
33         }
34         assert(size_to_bucket(63, *min) == bucket);
35         assert(size_to_bucket(63, *max) == bucket);
36         if (bucket > 8)
37                 assert(size_to_bucket(63, *min - 8) == bucket - 1);
38         assert(size_to_bucket(63, *max + 8) == bucket + 1);
39 }
40
41 static int count_hash(struct tdb_context *tdb,
42                       tdb_off_t hash_off, unsigned bits)
43 {
44         const tdb_off_t *h;
45         unsigned int i, count = 0;
46
47         h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true);
48         if (!h)
49                 return -1;
50         for (i = 0; i < (1 << bits); i++)
51                 count += (h[i] != 0);
52
53         tdb_access_release(tdb, h);
54         return count;
55 }
56
57 static tdb_len_t summarize_zone(struct tdb_context *tdb, tdb_off_t zone_off,
58                                 struct tally *zones,
59                                 struct tally *hashes,
60                                 struct tally *free,
61                                 struct tally *keys,
62                                 struct tally *data,
63                                 struct tally *extra,
64                                 struct tally *uncoal,
65                                 uint64_t bucketlen[],
66                                 unsigned int *num_buckets)
67 {
68         struct free_zone_header zhdr;
69         tdb_off_t off, end;
70         tdb_len_t len;
71         unsigned int hdrlen;
72         tdb_len_t unc = 0;
73
74         if (tdb_read_convert(tdb, zone_off, &zhdr, sizeof(zhdr)) == -1)
75                 return TDB_OFF_ERR;
76
77         tally_add(zones, 1ULL << zhdr.zone_bits);
78         *num_buckets = BUCKETS_FOR_ZONE(zhdr.zone_bits);
79
80         hdrlen = sizeof(zhdr)
81                 + (BUCKETS_FOR_ZONE(zhdr.zone_bits) + 1) * sizeof(tdb_off_t);
82
83         end = zone_off + (1ULL << zhdr.zone_bits);
84         if (end > tdb->map_size)
85                 end = tdb->map_size;
86
87         for (off = zone_off + hdrlen; off < end; off += len) {
88                 union {
89                         struct tdb_used_record u;
90                         struct tdb_free_record f;
91                 } pad, *p;
92                 p = tdb_get(tdb, off, &pad, sizeof(pad));
93                 if (!p)
94                         return TDB_OFF_ERR;
95                 if (rec_magic(&p->u) != TDB_MAGIC) {
96                         len = p->f.data_len;
97                         tally_add(free, len);
98                         bucketlen[size_to_bucket(frec_zone_bits(&p->f), len)]++;
99                         len += sizeof(p->u);
100                         unc++;
101                 } else {
102                         if (unc) {
103                                 tally_add(uncoal, unc);
104                                 unc = 0;
105                         }
106                         len = sizeof(p->u)
107                                 + rec_key_length(&p->u)
108                                 + rec_data_length(&p->u)
109                                 + rec_extra_padding(&p->u);
110
111                         /* FIXME: Use different magic for hashes? */
112                         if (!rec_key_length(&p->u) && !rec_hash(&p->u)) {
113                                 int count = count_hash(tdb, off + sizeof(p->u),
114                                                        TDB_SUBLEVEL_HASH_BITS);
115                                 if (count == -1)
116                                         return TDB_OFF_ERR;
117                                 tally_add(hashes, count);
118                         } else {
119                                 tally_add(keys, rec_key_length(&p->u));
120                                 tally_add(data, rec_data_length(&p->u));
121                         }
122                         tally_add(extra, rec_extra_padding(&p->u));
123                 }
124         }
125         if (unc)
126                 tally_add(uncoal, unc);
127         return 1ULL << zhdr.zone_bits;
128 }
129
130 #define SUMMARY_FORMAT \
131         "Size of file/data: %zu/%zu\n" \
132         "Number of zones: %zu\n" \
133         "Smallest/average/largest zone size: %zu/%zu/%zu\n%s" \
134         "Number of records: %zu\n" \
135         "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \
136         "Smallest/average/largest data: %zu/%zu/%zu\n%s" \
137         "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \
138         "Number of free records: %zu\n" \
139         "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \
140         "Number of uncoalesced records: %zu\n" \
141         "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \
142         "Toplevel hash used: %u of %u\n" \
143         "Number of subhashes: %zu\n" \
144         "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \
145         "Percentage keys/data/padding/free/rechdrs/zonehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n"
146
147 #define BUCKET_SUMMARY_FORMAT_A                                 \
148         "Free bucket %zu: total entries %zu.\n"                 \
149         "Smallest/average/largest length: %zu/%zu/%zu\n%s"
150 #define BUCKET_SUMMARY_FORMAT_B                                 \
151         "Free bucket %zu-%zu: total entries %zu.\n"             \
152         "Smallest/average/largest length: %zu/%zu/%zu\n%s"
153
154 #define HISTO_WIDTH 70
155 #define HISTO_HEIGHT 20
156
157 char *tdb_summary(struct tdb_context *tdb, enum tdb_summary_flags flags)
158 {
159         tdb_off_t off;
160         tdb_len_t len;
161         unsigned int i, num_buckets, max_bucket = 0;
162         uint64_t total_buckets = 0;
163         struct tally *zones, *hashes, *freet, *keys, *data, *extra, *uncoal,
164                 *buckets[BUCKETS_FOR_ZONE(63)+1] = { NULL };
165         char *zonesg, *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg,
166                 *bucketsg[BUCKETS_FOR_ZONE(63)+1] = { NULL };
167         char *ret = NULL;
168
169         zonesg = hashesg = freeg = keysg = datag = extrag = uncoalg = NULL;
170
171         if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
172                 return NULL;
173
174         if (tdb_lock_expand(tdb, F_RDLCK) != 0) {
175                 tdb_allrecord_unlock(tdb, F_RDLCK);
176                 return NULL;
177         }
178
179         /* Start stats off empty. */
180         zones = tally_new(HISTO_HEIGHT);
181         hashes = tally_new(HISTO_HEIGHT);
182         freet = tally_new(HISTO_HEIGHT);
183         keys = tally_new(HISTO_HEIGHT);
184         data = tally_new(HISTO_HEIGHT);
185         extra = tally_new(HISTO_HEIGHT);
186         uncoal = tally_new(HISTO_HEIGHT);
187         if (!zones || !hashes || !freet || !keys || !data || !extra
188             || !uncoal) {
189                 tdb->ecode = TDB_ERR_OOM;
190                 goto unlock;
191         }
192
193         for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
194                 buckets[i] = tally_new(HISTO_HEIGHT);
195                 if (!buckets[i]) {
196                         tdb->ecode = TDB_ERR_OOM;
197                         goto unlock;
198                 }
199         }
200
201         for (off = sizeof(struct tdb_header);
202              off < tdb->map_size - 1;
203              off += len) {
204                 uint64_t bucketlen[BUCKETS_FOR_ZONE(63)+1] = { 0 };
205                 len = summarize_zone(tdb, off, zones, hashes, freet, keys,
206                                      data, extra, uncoal, bucketlen,
207                                      &num_buckets);
208                 if (len == TDB_OFF_ERR)
209                         goto unlock;
210                 for (i = 0; i < num_buckets; i++)
211                         tally_add(buckets[i], bucketlen[i]);
212                 if (num_buckets > max_bucket)
213                         max_bucket = num_buckets;
214                 total_buckets += num_buckets;
215         }
216
217         if (flags & TDB_SUMMARY_HISTOGRAMS) {
218                 zonesg = tally_histogram(zones, HISTO_WIDTH, HISTO_HEIGHT);
219                 hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT);
220                 freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT);
221                 keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT);
222                 datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT);
223                 extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT);
224                 uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT);
225                 for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
226                         bucketsg[i] = tally_histogram(buckets[i],
227                                                       HISTO_WIDTH,
228                                                       HISTO_HEIGHT);
229                 }
230         }
231
232         /* 20 is max length of a %llu. */
233         len = strlen(SUMMARY_FORMAT) + 33*20 + 1
234                 + (zonesg ? strlen(zonesg) : 0)
235                 + (hashesg ? strlen(hashesg) : 0)
236                 + (freeg ? strlen(freeg) : 0)
237                 + (keysg ? strlen(keysg) : 0)
238                 + (datag ? strlen(datag) : 0)
239                 + (extrag ? strlen(extrag) : 0)
240                 + (uncoalg ? strlen(uncoalg) : 0);
241         for (i = 0; i < max_bucket; i++) {
242                 len += strlen(BUCKET_SUMMARY_FORMAT_B) + 6 * 20
243                         + (bucketsg[i] ? strlen(bucketsg[i]) : 0);
244         }
245
246         ret = malloc(len);
247         if (!ret)
248                 goto unlock;
249
250         len = sprintf(ret, SUMMARY_FORMAT,
251                       (size_t)tdb->map_size,
252                       tally_num(keys) + tally_num(data),
253                       tally_num(zones),
254                       tally_min(zones), tally_mean(zones), tally_max(zones),
255                       zonesg ? zonesg : "",
256                       tally_num(keys),
257                       tally_min(keys), tally_mean(keys), tally_max(keys),
258                       keysg ? keysg : "",
259                       tally_min(data), tally_mean(data), tally_max(data),
260                       datag ? datag : "",
261                       tally_min(extra), tally_mean(extra), tally_max(extra),
262                       extrag ? extrag : "",
263                       tally_num(freet),
264                       tally_min(freet), tally_mean(freet), tally_max(freet),
265                       freeg ? freeg : "",
266                       tally_total(uncoal, NULL),
267                       tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal),
268                       uncoalg ? uncoalg : "",
269                       count_hash(tdb, offsetof(struct tdb_header, hashtable),
270                                  TDB_TOPLEVEL_HASH_BITS),
271                       1 << TDB_TOPLEVEL_HASH_BITS,
272                       tally_num(hashes),
273                       tally_min(hashes), tally_mean(hashes), tally_max(hashes),
274                       hashesg ? hashesg : "",
275                       tally_total(keys, NULL) * 100.0 / tdb->map_size,
276                       tally_total(data, NULL) * 100.0 / tdb->map_size,
277                       tally_total(extra, NULL) * 100.0 / tdb->map_size,
278                       tally_total(freet, NULL) * 100.0 / tdb->map_size,
279                       (tally_num(keys) + tally_num(freet) + tally_num(hashes))
280                       * sizeof(struct tdb_used_record) * 100.0 / tdb->map_size,
281                       (tally_num(zones) * sizeof(struct free_zone_header)
282                        + total_buckets * sizeof(tdb_off_t))
283                       * 100.0 / tdb->map_size,
284                       (tally_num(hashes)
285                        * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
286                        + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS))
287                       * 100.0 / tdb->map_size);
288
289         for (i = 0; i < max_bucket; i++) {
290                 size_t min, max;
291                 sizes_for_bucket(i, &min, &max);
292                 if (min == max) {
293                         len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_A,
294                                        min, tally_total(buckets[i], NULL),
295                                        tally_min(buckets[i]),
296                                        tally_mean(buckets[i]),
297                                        tally_max(buckets[i]),
298                                        bucketsg[i] ? bucketsg[i] : "");
299                 } else {
300                         len += sprintf(ret + len, BUCKET_SUMMARY_FORMAT_B,
301                                        min, max, tally_total(buckets[i], NULL),
302                                        tally_min(buckets[i]),
303                                        tally_mean(buckets[i]),
304                                        tally_max(buckets[i]),
305                                        bucketsg[i] ? bucketsg[i] : "");
306                 }
307         }
308
309 unlock:
310         free(zonesg);
311         free(hashesg);
312         free(freeg);
313         free(keysg);
314         free(datag);
315         free(extrag);
316         free(uncoalg);
317         free(zones);
318         free(hashes);
319         free(freet);
320         free(keys);
321         free(data);
322         free(extra);
323         free(uncoal);
324         for (i = 0; i < sizeof(buckets)/sizeof(buckets[0]); i++) {
325                 free(buckets[i]);
326                 free(bucketsg[i]);
327         }
328
329         tdb_allrecord_unlock(tdb, F_RDLCK);
330         tdb_unlock_expand(tdb, F_RDLCK);
331         return ret;
332 }