]> git.ozlabs.org Git - ccan/blob - ccan/alloc/alloc.c
alloc: fix bug in tiny allocator.
[ccan] / ccan / alloc / alloc.c
1 #include <unistd.h>
2 #include <stdint.h>
3 #include <string.h>
4 #include <limits.h>
5 #include <assert.h>
6 #include <stdlib.h>
7 #include "alloc.h"
8 #include "bitops.h"
9 #include "tiny.h"
10 #include <ccan/build_assert/build_assert.h>
11 #include <ccan/likely/likely.h>
12 #include <ccan/short_types/short_types.h>
13 #include "config.h"
14
15 /*
16    Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
17    http://samba.org/~tridge/junkcode/alloc_mmap/
18
19    Copyright (C) Andrew Tridgell 2007
20    
21    This library is free software; you can redistribute it and/or
22    modify it under the terms of the GNU Lesser General Public
23    License as published by the Free Software Foundation; either
24    version 2 of the License, or (at your option) any later version.
25
26    This library is distributed in the hope that it will be useful,
27    but WITHOUT ANY WARRANTY; without even the implied warranty of
28    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
29    Lesser General Public License for more details.
30
31    You should have received a copy of the GNU Lesser General Public
32    License along with this library; if not, write to the Free Software
33    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
34  */
35
36 #if 0 /* Until we have the tiny allocator working, go down to 64k */
37
38 /* We divide the pool into this many large pages (nearest power of 2) */
39 #define MAX_LARGE_PAGES (1024UL)
40
41 /* 32 small pages == 1 large page. */
42 #define BITS_FROM_SMALL_TO_LARGE_PAGE 5
43
44 #else
45
46 #define MAX_LARGE_PAGES (128UL)
47 #define BITS_FROM_SMALL_TO_LARGE_PAGE 4
48
49 #endif
50
51 #define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
52
53 /* Smallest pool size for this scheme: 128-byte small pages.  That's
54  * 9/13% overhead for 32/64 bit. */
55 #define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128)
56
57 /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
58 #define INTER_BUCKET_SPACE 4
59
60 #define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE)
61
62 /* FIXME: Figure this out properly. */
63 #define MAX_SIZE (1 << 30)
64
65 /* How few object to fit in a page before using a larger one? (8) */
66 #define MAX_PAGE_OBJECT_ORDER   3
67
68 #define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
69
70 struct bucket_state {
71         u32 elements_per_page;
72         u16 page_list;
73         u16 full_list;
74 };
75
76 struct header {
77         /* Bitmap of which pages are large. */
78         unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG];
79
80         /* List of unused small/large pages. */
81         u16 small_free_list;
82         u16 large_free_list;
83
84         /* This is less defined: we have two buckets for each power of 2 */
85         struct bucket_state bs[1];
86 };
87
88 struct page_header {
89         u16 next, prev;
90         /* FIXME: We can just count all-0 and all-1 used[] elements. */
91         unsigned elements_used : 25;
92         unsigned bucket : 7;
93         unsigned long used[1]; /* One bit per element. */
94 };
95
96 /*
97  * Every 4 buckets, the size doubles.
98  * Between buckets, sizes increase linearly.
99  *
100  * eg. bucket 40 = 2^10                 = 1024
101  *     bucket 41 = 2^10 + 2^10*4        = 1024 + 256
102  *     bucket 42 = 2^10 + 2^10*4        = 1024 + 512
103  *     bucket 43 = 2^10 + 2^10*4        = 1024 + 768
104  *     bucket 45 = 2^11                 = 2048
105  *
106  * Care is taken to handle low numbered buckets, at cost of overflow.
107  */
108 static unsigned long bucket_to_size(unsigned int bucket)
109 {
110         unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
111         return base + ((bucket % INTER_BUCKET_SPACE)
112                        << (bucket / INTER_BUCKET_SPACE))
113                 / INTER_BUCKET_SPACE;
114 }
115
116 /*
117  * Say size is 10.
118  *   fls(size/2) == 3.  1 << 3 == 8, so we're 2 too large, out of a possible
119  * 8 too large.  That's 1/4 of the way to the next power of 2 == 1 bucket.
120  *
121  * We make sure we round up.  Note that this fails on 32 bit at size
122  * 1879048193 (around bucket 120).
123  */
124 static unsigned int size_to_bucket(unsigned long size)
125 {
126         unsigned int base = fls(size/2);
127         unsigned long overshoot;
128
129         overshoot = size - (1 << base);
130         return base * INTER_BUCKET_SPACE
131                 + ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
132 }
133
134 static unsigned int small_page_bits(unsigned long poolsize)
135 {
136         return fls(poolsize / MAX_SMALL_PAGES / 2);
137 }
138
139 static struct page_header *from_pgnum(struct header *head,
140                                       unsigned long pgnum,
141                                       unsigned sp_bits)
142 {
143         return (struct page_header *)((char *)head + (pgnum << sp_bits));
144 }
145
146 static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits)
147 {
148         return ((char *)p - (char *)head) >> sp_bits;
149 }
150
151 static size_t used_size(unsigned int num_elements)
152 {
153         return align_up(num_elements, BITS_PER_LONG) / CHAR_BIT;
154 }
155
156 /*
157  * We always align the first entry to the lower power of 2.
158  * eg. the 12-byte bucket gets 8-byte aligned.  The 4096-byte bucket
159  * gets 4096-byte aligned.
160  */
161 static unsigned long page_header_size(unsigned int align_bits,
162                                       unsigned long num_elements)
163 {
164         unsigned long size;
165
166         size = sizeof(struct page_header)
167                 - sizeof(((struct page_header *)0)->used)
168                 + used_size(num_elements);
169         return align_up(size, 1 << align_bits);
170 }
171
172 static void add_to_list(struct header *head,
173                         u16 *list, struct page_header *ph, unsigned sp_bits)
174 {
175         unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits);
176
177         ph->next = h;
178         if (h) {
179                 struct page_header *prev = from_pgnum(head, h, sp_bits);
180                 assert(prev->prev == 0);
181                 prev->prev = offset;
182         }
183         *list = offset;
184         ph->prev = 0;
185 }
186
187 static void del_from_list(struct header *head,
188                           u16 *list, struct page_header *ph, unsigned sp_bits)
189 {
190         /* Front of list? */
191         if (ph->prev == 0) {
192                 *list = ph->next;
193         } else {
194                 struct page_header *prev = from_pgnum(head, ph->prev, sp_bits);
195                 prev->next = ph->next;
196         }
197         if (ph->next != 0) {
198                 struct page_header *next = from_pgnum(head, ph->next, sp_bits);
199                 next->prev = ph->prev;
200         }
201 }
202
203 static u16 pop_from_list(struct header *head,
204                                    u16 *list,
205                                    unsigned int sp_bits)
206 {
207         u16 h = *list;
208         struct page_header *ph = from_pgnum(head, h, sp_bits);
209
210         if (likely(h)) {
211                 *list = ph->next;
212                 if (*list)
213                         from_pgnum(head, *list, sp_bits)->prev = 0;
214         }
215         return h;
216 }
217
218 static void add_small_page_to_freelist(struct header *head,
219                                        struct page_header *ph,
220                                        unsigned int sp_bits)
221 {
222         add_to_list(head, &head->small_free_list, ph, sp_bits);
223 }
224
225 static void add_large_page_to_freelist(struct header *head,
226                                        struct page_header *ph,
227                                        unsigned int sp_bits)
228 {
229         add_to_list(head, &head->large_free_list, ph, sp_bits);
230 }
231
232 static void add_to_bucket_list(struct header *head,
233                                struct bucket_state *bs,
234                                struct page_header *ph,
235                                unsigned int sp_bits)
236 {
237         add_to_list(head, &bs->page_list, ph, sp_bits);
238 }
239
240 static void del_from_bucket_list(struct header *head,
241                                  struct bucket_state *bs,
242                                  struct page_header *ph,
243                                  unsigned int sp_bits)
244 {
245         del_from_list(head, &bs->page_list, ph, sp_bits);
246 }
247
248 static void del_from_bucket_full_list(struct header *head,
249                                       struct bucket_state *bs,
250                                       struct page_header *ph,
251                                       unsigned int sp_bits)
252 {
253         del_from_list(head, &bs->full_list, ph, sp_bits);
254 }
255
256 static void add_to_bucket_full_list(struct header *head,
257                                     struct bucket_state *bs,
258                                     struct page_header *ph,
259                                     unsigned int sp_bits)
260 {
261         add_to_list(head, &bs->full_list, ph, sp_bits);
262 }
263
264 static void clear_bit(unsigned long bitmap[], unsigned int off)
265 {
266         bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
267 }
268
269 static bool test_bit(const unsigned long bitmap[], unsigned int off)
270 {
271         return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
272 }
273
274 static void set_bit(unsigned long bitmap[], unsigned int off)
275 {
276         bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
277 }
278
279 /* There must be a bit to be found. */
280 static unsigned int find_free_bit(const unsigned long bitmap[])
281 {
282         unsigned int i;
283
284         for (i = 0; bitmap[i] == -1UL; i++);
285         return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
286 }
287
288 /* How many elements can we fit in a page? */
289 static unsigned long elements_per_page(unsigned long align_bits,
290                                        unsigned long esize,
291                                        unsigned long psize)
292 {
293         unsigned long num, overhead;
294
295         /* First approximation: no extra room for bitmap. */
296         overhead = align_up(sizeof(struct page_header), 1 << align_bits);
297         num = (psize - overhead) / esize;
298
299         while (page_header_size(align_bits, num) + esize * num > psize)
300                 num--;
301         return num;
302 }
303
304 static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits)
305 {
306         unsigned long max_smallsize;
307
308         /* Note: this doesn't take into account page header. */
309         max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
310
311         return bucket_to_size(bucket) > max_smallsize;
312 }
313
314 static unsigned int max_bucket(unsigned int lp_bits)
315 {
316         return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
317 }
318
319 void alloc_init(void *pool, unsigned long poolsize)
320 {
321         struct header *head = pool;
322         struct page_header *ph;
323         unsigned int lp_bits, sp_bits, num_buckets;
324         unsigned long header_size, i;
325
326         if (poolsize < MIN_USEFUL_SIZE) {
327                 tiny_alloc_init(pool, poolsize);
328                 return;
329         }
330
331         /* We rely on page numbers fitting in 16 bit. */
332         BUILD_ASSERT(MAX_SMALL_PAGES < 65536);
333         
334         sp_bits = small_page_bits(poolsize);
335         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
336
337         num_buckets = max_bucket(lp_bits);
338
339         head = pool;
340         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
341
342         memset(head, 0, header_size);
343         for (i = 0; i < num_buckets; i++) {
344                 unsigned long pagesize;
345
346                 if (large_page_bucket(i, sp_bits))
347                         pagesize = 1UL << lp_bits;
348                 else
349                         pagesize = 1UL << sp_bits;
350
351                 head->bs[i].elements_per_page
352                         = elements_per_page(i / INTER_BUCKET_SPACE,
353                                             bucket_to_size(i),
354                                             pagesize);
355         }
356
357         /* They start as all large pages. */
358         memset(head->pagesize, 0xFF, sizeof(head->pagesize));
359         /* FIXME: small pages for last bit? */
360
361         /* Split first page into small pages. */
362         assert(header_size < (1UL << lp_bits));
363         clear_bit(head->pagesize, 0);
364
365         /* Skip over page(s) used by header, add rest to free list */
366         for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
367              i < SMALL_PAGES_PER_LARGE_PAGE;
368              i++) {
369                 ph = from_pgnum(head, i, sp_bits);
370                 ph->elements_used = 0;
371                 add_small_page_to_freelist(head, ph, sp_bits);
372         }
373
374         /* Add the rest of the pages as large pages. */
375         i = SMALL_PAGES_PER_LARGE_PAGE;
376         while ((i << sp_bits) + (1 << lp_bits) <= poolsize) {
377                 ph = from_pgnum(head, i, sp_bits);
378                 ph->elements_used = 0;
379                 add_large_page_to_freelist(head, ph, sp_bits);
380                 i += SMALL_PAGES_PER_LARGE_PAGE;
381         }
382 }
383
384 /* A large page worth of small pages are free: delete them from free list. */
385 static void del_large_from_small_free_list(struct header *head,
386                                            struct page_header *ph,
387                                            unsigned int sp_bits)
388 {
389         unsigned long i;
390
391         for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
392                 del_from_list(head, &head->small_free_list,
393                               (void *)ph + (i << sp_bits),
394                               sp_bits);
395         }
396 }
397
398 static bool all_empty(struct header *head,
399                       unsigned long pgnum,
400                       unsigned sp_bits)
401 {
402         unsigned long i;
403
404         for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
405                 struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits);
406                 if (ph->elements_used)
407                         return false;
408         }
409         return true;
410 }
411
412 static u16 get_large_page(struct header *head, unsigned long poolsize,
413                           unsigned int sp_bits)
414 {
415         unsigned int lp_bits, i, page;
416
417         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
418
419         page = pop_from_list(head, &head->large_free_list, sp_bits);
420         if (likely(page))
421                 return page;
422
423         /* Look for small pages to coalesce, after first large page. */
424         for (i = SMALL_PAGES_PER_LARGE_PAGE;
425              i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE;
426              i += SMALL_PAGES_PER_LARGE_PAGE) {
427                 /* Already a large page? */
428                 if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE))
429                         continue;
430                 if (all_empty(head, i, sp_bits)) {
431                         struct page_header *ph = from_pgnum(head, i, sp_bits);
432                         set_bit(head->pagesize,
433                                 i / SMALL_PAGES_PER_LARGE_PAGE);
434                         del_large_from_small_free_list(head, ph, sp_bits);
435                         add_large_page_to_freelist(head, ph, sp_bits);
436                 }
437         }
438                         
439         return pop_from_list(head, &head->large_free_list, sp_bits);
440 }
441
442 /* Returns small page. */
443 static unsigned long break_up_large_page(struct header *head,
444                                          unsigned int sp_bits,
445                                          u16 lpage)
446 {
447         unsigned int i;
448
449         clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE);
450
451         for (i = 1; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
452                 struct page_header *ph = from_pgnum(head, lpage + i, sp_bits);
453                 add_small_page_to_freelist(head, ph, sp_bits);
454         }
455
456         return lpage;
457 }
458
459 static u16 get_small_page(struct header *head, unsigned long poolsize,
460                           unsigned int sp_bits)
461 {
462         u16 ret;
463
464         ret = pop_from_list(head, &head->small_free_list, sp_bits);
465         if (likely(ret))
466                 return ret;
467         ret = get_large_page(head, poolsize, sp_bits);
468         if (likely(ret))
469                 ret = break_up_large_page(head, sp_bits, ret);
470         return ret;
471 }
472
473 void *alloc_get(void *pool, unsigned long poolsize,
474                 unsigned long size, unsigned long align)
475 {
476         struct header *head = pool;
477         unsigned int bucket;
478         unsigned long i;
479         struct bucket_state *bs;
480         struct page_header *ph;
481         unsigned int sp_bits;
482
483         if (poolsize < MIN_USEFUL_SIZE) {
484                 return tiny_alloc_get(pool, poolsize, size, align);
485         }
486
487         size = align_up(size, align);
488         if (unlikely(!size))
489                 size = 1;
490         bucket = size_to_bucket(size);
491
492         sp_bits = small_page_bits(poolsize);
493
494         if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) {
495                 /* FIXME: huge alloc. */
496                 return NULL;
497         }
498
499         bs = &head->bs[bucket];
500
501         if (!bs->page_list) {
502                 struct page_header *ph;
503
504                 if (large_page_bucket(bucket, sp_bits))
505                         bs->page_list = get_large_page(head, poolsize,
506                                                        sp_bits);
507                 else
508                         bs->page_list = get_small_page(head, poolsize,
509                                                        sp_bits);
510                 /* FIXME: Try large-aligned alloc?  Header stuffing? */
511                 if (unlikely(!bs->page_list))
512                         return NULL;
513                 ph = from_pgnum(head, bs->page_list, sp_bits);
514                 ph->bucket = bucket;
515                 ph->elements_used = 0;
516                 ph->next = 0;
517                 memset(ph->used, 0, used_size(bs->elements_per_page));
518         }
519
520         ph = from_pgnum(head, bs->page_list, sp_bits);
521
522         i = find_free_bit(ph->used);
523         set_bit(ph->used, i);
524         ph->elements_used++;
525
526         /* check if this page is now full */
527         if (unlikely(ph->elements_used == bs->elements_per_page)) {
528                 del_from_bucket_list(head, bs, ph, sp_bits);
529                 add_to_bucket_full_list(head, bs, ph, sp_bits);
530         }
531
532         return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
533                                              bs->elements_per_page)
534                + i * bucket_to_size(bucket);
535 }
536
537 void alloc_free(void *pool, unsigned long poolsize, void *free)
538 {
539         struct header *head = pool;
540         struct bucket_state *bs;
541         unsigned int sp_bits;
542         unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool;
543         bool smallpage;
544         struct page_header *ph;
545
546         if (poolsize < MIN_USEFUL_SIZE) {
547                 return tiny_alloc_free(pool, poolsize, free);
548         }
549         
550         /* Get page header. */
551         sp_bits = small_page_bits(poolsize);
552         pgnum = offset >> sp_bits;
553
554         /* Big page? Round down further. */
555         if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
556                 smallpage = false;
557                 pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
558         } else
559                 smallpage = true;
560
561         /* Step back to page header. */
562         ph = from_pgnum(head, pgnum, sp_bits);
563         bs = &head->bs[ph->bucket];
564         pgoffset = offset - (pgnum << sp_bits)
565                 - page_header_size(ph->bucket / INTER_BUCKET_SPACE,
566                                    bs->elements_per_page);
567
568         if (unlikely(ph->elements_used == bs->elements_per_page)) {
569                 del_from_bucket_full_list(head, bs, ph, sp_bits);
570                 add_to_bucket_list(head, bs, ph, sp_bits);
571         }
572
573         /* Which element are we? */
574         i = pgoffset / bucket_to_size(ph->bucket);
575         clear_bit(ph->used, i);
576         ph->elements_used--;
577
578         if (unlikely(ph->elements_used == 0)) {
579                 bs = &head->bs[ph->bucket];
580                 del_from_bucket_list(head, bs, ph, sp_bits);
581                 if (smallpage)
582                         add_small_page_to_freelist(head, ph, sp_bits);
583                 else
584                         add_large_page_to_freelist(head, ph, sp_bits);
585         }
586 }
587
588 unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
589 {
590         struct header *head = pool;
591         unsigned int pgnum, sp_bits;
592         unsigned long offset = (char *)p - (char *)pool;
593         struct page_header *ph;
594
595         if (poolsize < MIN_USEFUL_SIZE)
596                 return tiny_alloc_size(pool, poolsize, p);
597
598         /* Get page header. */
599         sp_bits = small_page_bits(poolsize);
600         pgnum = offset >> sp_bits;
601
602         /* Big page? Round down further. */
603         if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE))
604                 pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
605
606         /* Step back to page header. */
607         ph = from_pgnum(head, pgnum, sp_bits);
608         return bucket_to_size(ph->bucket);
609 }
610
611 /* Useful for gdb breakpoints. */
612 static bool check_fail(void)
613 {
614         return false;
615 }
616
617 static unsigned long count_bits(const unsigned long bitmap[],
618                                 unsigned long limit)
619 {
620         unsigned long i, count = 0;
621
622         while (limit >= BITS_PER_LONG) {
623                 count += popcount(bitmap[0]);
624                 bitmap++;
625                 limit -= BITS_PER_LONG;
626         }
627
628         for (i = 0; i < limit; i++)
629                 if (test_bit(bitmap, i))
630                         count++;
631         return count;
632 }
633
634 static bool out_of_bounds(unsigned long pgnum,
635                           unsigned int sp_bits,
636                           unsigned long pagesize,
637                           unsigned long poolsize)
638 {
639         if (((pgnum << sp_bits) >> sp_bits) != pgnum)
640                 return true;
641
642         if ((pgnum << sp_bits) > poolsize)
643                 return true;
644
645         return ((pgnum << sp_bits) + pagesize > poolsize);
646 }
647
648 static bool check_bucket(struct header *head,
649                          unsigned long poolsize,
650                          unsigned long pages[],
651                          struct bucket_state *bs,
652                          unsigned int bindex)
653 {
654         bool lp_bucket;
655         struct page_header *ph;
656         unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
657
658         sp_bits = small_page_bits(poolsize);
659         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
660
661         lp_bucket = large_page_bucket(bindex, sp_bits);
662
663         pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
664
665         /* This many elements fit? */
666         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
667                                  bs->elements_per_page);
668         taken += bucket_to_size(bindex) * bs->elements_per_page;
669         if (taken > pagesize)
670                 return check_fail();
671
672         /* One more wouldn't fit? */
673         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
674                                  bs->elements_per_page + 1);
675         taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
676         if (taken <= pagesize)
677                 return check_fail();
678
679         /* Walk used list. */
680         prev = 0;
681         for (i = bs->page_list; i; i = ph->next) {
682                 /* Bad pointer? */
683                 if (out_of_bounds(i, sp_bits, pagesize, poolsize))
684                         return check_fail();
685                 /* Wrong size page? */
686                 if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
687                     != lp_bucket)
688                         return check_fail();
689                 /* Large page not on boundary? */
690                 if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
691                         return check_fail();
692                 ph = from_pgnum(head, i, sp_bits);
693                 /* Linked list corrupt? */
694                 if (ph->prev != prev)
695                         return check_fail();
696                 /* Already seen this page? */
697                 if (test_bit(pages, i))
698                         return check_fail();
699                 set_bit(pages, i);
700                 /* Empty or full? */
701                 if (ph->elements_used == 0)
702                         return check_fail();
703                 if (ph->elements_used >= bs->elements_per_page)
704                         return check_fail();
705                 /* Used bits don't agree? */
706                 if (ph->elements_used != count_bits(ph->used,
707                                                     bs->elements_per_page))
708                         return check_fail();
709                 /* Wrong bucket? */
710                 if (ph->bucket != bindex)
711                         return check_fail();
712                 prev = i;
713         }
714
715         /* Walk full list. */
716         prev = 0;
717         for (i = bs->full_list; i; i = ph->next) {
718                 /* Bad pointer? */
719                 if (out_of_bounds(i, sp_bits, pagesize, poolsize))
720                         return check_fail();
721                 /* Wrong size page? */
722                 if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
723                     != lp_bucket)
724                 /* Large page not on boundary? */
725                 if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
726                         return check_fail();
727                 ph = from_pgnum(head, i, sp_bits);
728                 /* Linked list corrupt? */
729                 if (ph->prev != prev)
730                         return check_fail();
731                 /* Already seen this page? */
732                 if (test_bit(pages, i))
733                         return check_fail();
734                 set_bit(pages, i);
735                 /* Not full? */
736                 if (ph->elements_used != bs->elements_per_page)
737                         return check_fail();
738                 /* Used bits don't agree? */
739                 if (ph->elements_used != count_bits(ph->used,
740                                                     bs->elements_per_page))
741                         return check_fail();
742                 /* Wrong bucket? */
743                 if (ph->bucket != bindex)
744                         return check_fail();
745                 prev = i;
746         }
747         return true;
748 }
749
750 bool alloc_check(void *pool, unsigned long poolsize)
751 {
752         struct header *head = pool;
753         unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
754         struct page_header *ph;
755         unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 };
756
757         if (poolsize < MIN_USEFUL_SIZE)
758                 return tiny_alloc_check(pool, poolsize);
759
760         sp_bits = small_page_bits(poolsize);
761         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
762
763         num_buckets = max_bucket(lp_bits);
764
765         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
766
767         /* First, set all bits taken by header. */
768         for (i = 0; i < header_size; i += (1UL << sp_bits))
769                 set_bit(pages, i >> sp_bits);
770
771         /* Check small page free list. */
772         prev = 0;
773         for (i = head->small_free_list; i; i = ph->next) {
774                 /* Bad pointer? */
775                 if (out_of_bounds(i, sp_bits, 1 << sp_bits, poolsize))
776                         return check_fail();
777                 /* Large page? */
778                 if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
779                         return check_fail();
780                 ph = from_pgnum(head, i, sp_bits);
781                 /* Linked list corrupt? */
782                 if (ph->prev != prev)
783                         return check_fail();
784                 /* Already seen this page? */
785                 if (test_bit(pages, i))
786                         return check_fail();
787                 set_bit(pages, i);
788                 prev = i;
789         }
790
791         /* Check large page free list. */
792         prev = 0;
793         for (i = head->large_free_list; i; i = ph->next) {
794                 /* Bad pointer? */
795                 if (out_of_bounds(i, sp_bits, 1 << lp_bits, poolsize))
796                         return check_fail();
797                 /* Not large page? */
798                 if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
799                         return check_fail();
800                 /* Not page boundary? */
801                 if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
802                         return check_fail();
803                 ph = from_pgnum(head, i, sp_bits);
804                 /* Linked list corrupt? */
805                 if (ph->prev != prev)
806                         return check_fail();
807                 /* Already seen this page? */
808                 if (test_bit(pages, i))
809                         return check_fail();
810                 set_bit(pages, i);
811                 prev = i;
812         }
813
814         /* Check the buckets. */
815         for (i = 0; i < max_bucket(lp_bits); i++) {
816                 struct bucket_state *bs = &head->bs[i];
817
818                 if (!check_bucket(head, poolsize, pages, bs, i))
819                         return false;
820         }
821
822         /* Make sure every page accounted for. */
823         for (i = 0; i < poolsize >> sp_bits; i++) {
824                 if (!test_bit(pages, i))
825                         return check_fail();
826                 if (test_bit(head->pagesize,
827                              i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
828                         /* Large page, skip rest. */
829                         i += SMALL_PAGES_PER_LARGE_PAGE - 1;
830                 }
831         }
832
833         return true;
834 }
835
836 static unsigned long print_overhead(FILE *out, const char *desc,
837                                     unsigned long bytes,
838                                     unsigned long poolsize)
839 {
840         fprintf(out, "Overhead (%s): %lu bytes (%.3g%%)\n",
841                 desc, bytes, 100.0 * bytes / poolsize);
842         return bytes;
843 }
844
845 static unsigned long count_list(struct header *head,
846                                 u16 pgnum,
847                                 unsigned int sp_bits,
848                                 unsigned long *total_elems)
849 {
850         struct page_header *p;
851         unsigned long ret = 0;
852
853         while (pgnum) {
854                 p = from_pgnum(head, pgnum, sp_bits);
855                 if (total_elems)
856                         (*total_elems) += p->elements_used;
857                 ret++;
858                 pgnum = p->next;
859         }
860         return ret;
861 }
862
863 static unsigned long visualize_bucket(FILE *out, struct header *head,
864                                       unsigned int bucket,
865                                       unsigned long poolsize,
866                                       unsigned int sp_bits)
867 {
868         unsigned long num_full, num_partial, num_pages, page_size,
869                 elems, hdr_min, hdr_size, elems_per_page, overhead = 0;
870
871         elems_per_page = head->bs[bucket].elements_per_page;
872
873         /* If we used byte-based bitmaps, we could get pg hdr to: */
874         hdr_min = sizeof(struct page_header)
875                 - sizeof(((struct page_header *)0)->used)
876                 + align_up(elems_per_page, CHAR_BIT) / CHAR_BIT;
877         hdr_size = page_header_size(bucket / INTER_BUCKET_SPACE,
878                                     elems_per_page);
879
880         elems = 0;
881         num_full = count_list(head, head->bs[bucket].full_list, sp_bits,
882                               &elems);
883         num_partial = count_list(head, head->bs[bucket].page_list, sp_bits,
884                                  &elems);
885         num_pages = num_full + num_partial;
886         if (!num_pages)
887                 return 0;
888
889         fprintf(out, "Bucket %u (%lu bytes):"
890                 " %lu full, %lu partial = %lu elements\n",
891                 bucket, bucket_to_size(bucket), num_full, num_partial, elems);
892         /* Strict requirement of page header size. */
893         overhead += print_overhead(out, "page headers",
894                                    hdr_min * num_pages, poolsize);
895         /* Gap between minimal page header and actual start. */
896         overhead += print_overhead(out, "page post-header alignments",
897                                    (hdr_size - hdr_min) * num_pages, poolsize);
898         /* Between last element and end of page. */
899         page_size = (1 << sp_bits);
900         if (large_page_bucket(bucket, sp_bits))
901                 page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE;
902
903         overhead += print_overhead(out, "page tails",
904                                    (page_size - (hdr_size
905                                                  + (elems_per_page
906                                                     * bucket_to_size(bucket))))
907                                    * num_pages, poolsize);
908         return overhead;
909 }
910
911 void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
912 {
913         struct header *head = pool;
914         unsigned long i, lp_bits, sp_bits, header_size, num_buckets, count,
915                 overhead = 0;
916
917         fprintf(out, "Pool %p size %lu: (%s allocator)\n", pool, poolsize,
918                 poolsize < MIN_USEFUL_SIZE ? "tiny" : "standard");
919
920         if (poolsize < MIN_USEFUL_SIZE) {
921                 tiny_alloc_visualize(out, pool, poolsize);
922                 return;
923         }
924         
925         sp_bits = small_page_bits(poolsize);
926         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
927
928         num_buckets = max_bucket(lp_bits);
929         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
930
931         fprintf(out, "Large page size %lu, small page size %lu.\n",
932                 1UL << lp_bits, 1UL << sp_bits);
933         overhead += print_overhead(out, "unused pool tail",
934                                    poolsize % (1 << lp_bits), poolsize);
935         fprintf(out, "Main header %lu bytes (%lu small pages).\n",
936                 header_size, align_up(header_size, 1 << sp_bits) >> sp_bits);
937         overhead += print_overhead(out, "partial header page",
938                                    align_up(header_size, 1 << sp_bits)
939                                    - header_size, poolsize);
940         /* Total large pages. */
941         i = count_bits(head->pagesize, poolsize >> lp_bits);
942         /* Used pages. */
943         count = i - count_list(head, head->large_free_list, sp_bits, NULL);
944         fprintf(out, "%lu/%lu large pages used (%.3g%%)\n",
945                 count, i, count ? 100.0 * count / i : 0.0);
946
947         /* Total small pages. */
948         i = (poolsize >> lp_bits) - i;
949         /* Used pages */
950         count = i - count_list(head, head->small_free_list, sp_bits, NULL);
951         fprintf(out, "%lu/%lu small pages used (%.3g%%)\n",
952                 count, i, count ? 100.0 * count / i : 0.0);
953
954         /* Summary of each bucket. */
955         fprintf(out, "%lu buckets:\n", num_buckets);
956         for (i = 0; i < num_buckets; i++)
957                 overhead += visualize_bucket(out, head, i, poolsize, sp_bits);
958
959         print_overhead(out, "total", overhead, poolsize);
960 }