alloc: reduce page header further, go down to 64k minimum.
[ccan] / ccan / alloc / alloc.c
1 #include <unistd.h>
2 #include <stdint.h>
3 #include <string.h>
4 #include <limits.h>
5 #include <assert.h>
6 #include <stdlib.h>
7 #include "alloc.h"
8 #include <ccan/build_assert/build_assert.h>
9 #include <ccan/likely/likely.h>
10 #include <ccan/short_types/short_types.h>
11 #include "config.h"
12
13 /*
14    Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
15    http://samba.org/~tridge/junkcode/alloc_mmap/
16
17    Copyright (C) Andrew Tridgell 2007
18    
19    This library is free software; you can redistribute it and/or
20    modify it under the terms of the GNU Lesser General Public
21    License as published by the Free Software Foundation; either
22    version 2 of the License, or (at your option) any later version.
23
24    This library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with this library; if not, write to the Free Software
31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
32  */
33
34 #if 0 /* Until we have the tiny allocator working, go down to 64k */
35
36 /* We divide the pool into this many large pages (nearest power of 2) */
37 #define MAX_LARGE_PAGES (1024UL)
38
39 /* 32 small pages == 1 large page. */
40 #define BITS_FROM_SMALL_TO_LARGE_PAGE 5
41
42 #else
43
44 #define MAX_LARGE_PAGES (128UL)
45 #define BITS_FROM_SMALL_TO_LARGE_PAGE 4
46
47 #endif
48
49 #define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
50
51 /* Smallest pool size for this scheme: 128-byte small pages.  That's
52  * 9/13% overhead for 32/64 bit. */
53 #define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128)
54
55 /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
56 #define INTER_BUCKET_SPACE 4
57
58 #define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE)
59
60 /* FIXME: Figure this out properly. */
61 #define MAX_SIZE (1 << 30)
62
63 /* How few object to fit in a page before using a larger one? (8) */
64 #define MAX_PAGE_OBJECT_ORDER   3
65
66 #define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
67
68 struct bucket_state {
69         u32 elements_per_page;
70         u16 page_list;
71         u16 full_list;
72 };
73
74 struct header {
75         /* Bitmap of which pages are large. */
76         unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG];
77
78         /* List of unused small/large pages. */
79         u16 small_free_list;
80         u16 large_free_list;
81
82         /* This is less defined: we have two buckets for each power of 2 */
83         struct bucket_state bs[1];
84 };
85
86 struct page_header {
87         u16 next, prev;
88         /* FIXME: We can just count all-0 and all-1 used[] elements. */
89         unsigned elements_used : 25;
90         unsigned bucket : 7;
91         unsigned long used[1]; /* One bit per element. */
92 };
93
94 /* 2 bit for every byte to allocate. */
95 static void tiny_alloc_init(void *pool, unsigned long poolsize)
96 {
97 /* FIXME */
98 }
99
100 static void *tiny_alloc_get(void *pool, unsigned long poolsize,
101                             unsigned long size, unsigned long align)
102 {
103 /* FIXME */
104         return NULL;
105 }
106
107 static void tiny_alloc_free(void *pool, unsigned long poolsize, void *free)
108 {
109 /* FIXME */
110 }
111
112 static unsigned long tiny_alloc_size(void *pool, unsigned long poolsize,
113                                       void *p)
114 {
115 /* FIXME */
116         return 0;
117 }
118
119 static bool tiny_alloc_check(void *pool, unsigned long poolsize)
120 {
121 /* FIXME */
122         return true;
123 }
124
125 static unsigned int fls(unsigned long val)
126 {
127 #if HAVE_BUILTIN_CLZL
128         /* This is significantly faster! */
129         return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
130 #else
131         unsigned int r = 32;
132
133         if (!val)
134                 return 0;
135         if (!(val & 0xffff0000u)) {
136                 val <<= 16;
137                 r -= 16;
138         }
139         if (!(val & 0xff000000u)) {
140                 val <<= 8;
141                 r -= 8;
142         }
143         if (!(val & 0xf0000000u)) {
144                 val <<= 4;
145                 r -= 4;
146         }
147         if (!(val & 0xc0000000u)) {
148                 val <<= 2;
149                 r -= 2;
150         }
151         if (!(val & 0x80000000u)) {
152                 val <<= 1;
153                 r -= 1;
154         }
155         return r;
156 #endif
157 }
158
159 /* FIXME: Move to bitops. */
160 static unsigned int ffsl(unsigned long val)
161 {
162 #if HAVE_BUILTIN_FFSL
163         /* This is significantly faster! */
164         return __builtin_ffsl(val);
165 #else
166         unsigned int r = 1;
167
168         if (!val)
169                 return 0;
170         if (sizeof(long) == sizeof(u64)) {
171                 if (!(val & 0xffffffff)) {
172                         /* Workaround gcc warning on 32-bit:
173                            error: right shift count >= width of type */
174                         u64 tmp = val;
175                         tmp >>= 32;
176                         val = tmp;
177                         r += 32;
178                 }
179         }
180         if (!(val & 0xffff)) {
181                 val >>= 16;
182                 r += 16;
183         }
184         if (!(val & 0xff)) {
185                 val >>= 8;
186                 r += 8;
187         }
188         if (!(val & 0xf)) {
189                 val >>= 4;
190                 r += 4;
191         }
192         if (!(val & 3)) {
193                 val >>= 2;
194                 r += 2;
195         }
196         if (!(val & 1)) {
197                 val >>= 1;
198                 r += 1;
199         }
200         return r;
201 #endif
202 }
203
204 static unsigned int popcount(unsigned long val)
205 {
206 #if HAVE_BUILTIN_POPCOUNTL
207         return __builtin_popcountl(val);
208 #else
209         if (sizeof(long) == sizeof(u64)) {
210                 u64 v = val;
211                 v = (v & 0x5555555555555555ULL)
212                         + ((v >> 1) & 0x5555555555555555ULL);
213                 v = (v & 0x3333333333333333ULL)
214                         + ((v >> 1) & 0x3333333333333333ULL);
215                 v = (v & 0x0F0F0F0F0F0F0F0FULL)
216                         + ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL);
217                 v = (v & 0x00FF00FF00FF00FFULL)
218                         + ((v >> 1) & 0x00FF00FF00FF00FFULL);
219                 v = (v & 0x0000FFFF0000FFFFULL)
220                         + ((v >> 1) & 0x0000FFFF0000FFFFULL);
221                 v = (v & 0x00000000FFFFFFFFULL)
222                         + ((v >> 1) & 0x00000000FFFFFFFFULL);
223                 return v;
224         }
225         val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL);
226         val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL);
227         val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL);
228         val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL);
229         val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL);
230         return val;
231 #endif
232 }
233
234 /*
235  * Every 4 buckets, the size doubles.
236  * Between buckets, sizes increase linearly.
237  *
238  * eg. bucket 40 = 2^10                 = 1024
239  *     bucket 41 = 2^10 + 2^10*4        = 1024 + 256
240  *     bucket 42 = 2^10 + 2^10*4        = 1024 + 512
241  *     bucket 43 = 2^10 + 2^10*4        = 1024 + 768
242  *     bucket 45 = 2^11                 = 2048
243  *
244  * Care is taken to handle low numbered buckets, at cost of overflow.
245  */
246 static unsigned long bucket_to_size(unsigned int bucket)
247 {
248         unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
249         return base + ((bucket % INTER_BUCKET_SPACE)
250                        << (bucket / INTER_BUCKET_SPACE))
251                 / INTER_BUCKET_SPACE;
252 }
253
254 /*
255  * Say size is 10.
256  *   fls(size/2) == 3.  1 << 3 == 8, so we're 2 too large, out of a possible
257  * 8 too large.  That's 1/4 of the way to the next power of 2 == 1 bucket.
258  *
259  * We make sure we round up.  Note that this fails on 32 bit at size
260  * 1879048193 (around bucket 120).
261  */
262 static unsigned int size_to_bucket(unsigned long size)
263 {
264         unsigned int base = fls(size/2);
265         unsigned long overshoot;
266
267         overshoot = size - (1 << base);
268         return base * INTER_BUCKET_SPACE
269                 + ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
270 }
271
272 static unsigned int small_page_bits(unsigned long poolsize)
273 {
274         return fls(poolsize / MAX_SMALL_PAGES / 2);
275 }
276
277 static unsigned long align_up(unsigned long x, unsigned long align)
278 {
279         return (x + align - 1) & ~(align - 1);
280 }
281
282 static struct page_header *from_pgnum(struct header *head,
283                                       unsigned long pgnum,
284                                       unsigned sp_bits)
285 {
286         return (struct page_header *)((char *)head + (pgnum << sp_bits));
287 }
288
289 static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits)
290 {
291         return ((char *)p - (char *)head) >> sp_bits;
292 }
293
294 static size_t used_size(unsigned int num_elements)
295 {
296         return align_up(num_elements, BITS_PER_LONG) / CHAR_BIT;
297 }
298
299 /*
300  * We always align the first entry to the lower power of 2.
301  * eg. the 12-byte bucket gets 8-byte aligned.  The 4096-byte bucket
302  * gets 4096-byte aligned.
303  */
304 static unsigned long page_header_size(unsigned int align_bits,
305                                       unsigned long num_elements)
306 {
307         unsigned long size;
308
309         size = sizeof(struct page_header)
310                 - sizeof(((struct page_header *)0)->used)
311                 + used_size(num_elements);
312         return align_up(size, 1 << align_bits);
313 }
314
315 static void add_to_list(struct header *head,
316                         u16 *list, struct page_header *ph, unsigned sp_bits)
317 {
318         unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits);
319
320         ph->next = h;
321         if (h) {
322                 struct page_header *prev = from_pgnum(head, h, sp_bits);
323                 assert(prev->prev == 0);
324                 prev->prev = offset;
325         }
326         *list = offset;
327         ph->prev = 0;
328 }
329
330 static void del_from_list(struct header *head,
331                           u16 *list, struct page_header *ph, unsigned sp_bits)
332 {
333         /* Front of list? */
334         if (ph->prev == 0) {
335                 *list = ph->next;
336         } else {
337                 struct page_header *prev = from_pgnum(head, ph->prev, sp_bits);
338                 prev->next = ph->next;
339         }
340         if (ph->next != 0) {
341                 struct page_header *next = from_pgnum(head, ph->next, sp_bits);
342                 next->prev = ph->prev;
343         }
344 }
345
346 static u16 pop_from_list(struct header *head,
347                                    u16 *list,
348                                    unsigned int sp_bits)
349 {
350         u16 h = *list;
351         struct page_header *ph = from_pgnum(head, h, sp_bits);
352
353         if (likely(h)) {
354                 *list = ph->next;
355                 if (*list)
356                         from_pgnum(head, *list, sp_bits)->prev = 0;
357         }
358         return h;
359 }
360
361 static void add_small_page_to_freelist(struct header *head,
362                                        struct page_header *ph,
363                                        unsigned int sp_bits)
364 {
365         add_to_list(head, &head->small_free_list, ph, sp_bits);
366 }
367
368 static void add_large_page_to_freelist(struct header *head,
369                                        struct page_header *ph,
370                                        unsigned int sp_bits)
371 {
372         add_to_list(head, &head->large_free_list, ph, sp_bits);
373 }
374
375 static void add_to_bucket_list(struct header *head,
376                                struct bucket_state *bs,
377                                struct page_header *ph,
378                                unsigned int sp_bits)
379 {
380         add_to_list(head, &bs->page_list, ph, sp_bits);
381 }
382
383 static void del_from_bucket_list(struct header *head,
384                                  struct bucket_state *bs,
385                                  struct page_header *ph,
386                                  unsigned int sp_bits)
387 {
388         del_from_list(head, &bs->page_list, ph, sp_bits);
389 }
390
391 static void del_from_bucket_full_list(struct header *head,
392                                       struct bucket_state *bs,
393                                       struct page_header *ph,
394                                       unsigned int sp_bits)
395 {
396         del_from_list(head, &bs->full_list, ph, sp_bits);
397 }
398
399 static void add_to_bucket_full_list(struct header *head,
400                                     struct bucket_state *bs,
401                                     struct page_header *ph,
402                                     unsigned int sp_bits)
403 {
404         add_to_list(head, &bs->full_list, ph, sp_bits);
405 }
406
407 static void clear_bit(unsigned long bitmap[], unsigned int off)
408 {
409         bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
410 }
411
412 static bool test_bit(const unsigned long bitmap[], unsigned int off)
413 {
414         return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
415 }
416
417 static void set_bit(unsigned long bitmap[], unsigned int off)
418 {
419         bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
420 }
421
422 /* There must be a bit to be found. */
423 static unsigned int find_free_bit(const unsigned long bitmap[])
424 {
425         unsigned int i;
426
427         for (i = 0; bitmap[i] == -1UL; i++);
428         return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
429 }
430
431 /* How many elements can we fit in a page? */
432 static unsigned long elements_per_page(unsigned long align_bits,
433                                        unsigned long esize,
434                                        unsigned long psize)
435 {
436         unsigned long num, overhead;
437
438         /* First approximation: no extra room for bitmap. */
439         overhead = align_up(sizeof(struct page_header), 1 << align_bits);
440         num = (psize - overhead) / esize;
441
442         while (page_header_size(align_bits, num) + esize * num > psize)
443                 num--;
444         return num;
445 }
446
447 static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits)
448 {
449         unsigned long max_smallsize;
450
451         /* Note: this doesn't take into account page header. */
452         max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
453
454         return bucket_to_size(bucket) > max_smallsize;
455 }
456
457 static unsigned int max_bucket(unsigned int lp_bits)
458 {
459         return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
460 }
461
462 void alloc_init(void *pool, unsigned long poolsize)
463 {
464         struct header *head = pool;
465         struct page_header *ph;
466         unsigned int lp_bits, sp_bits, num_buckets;
467         unsigned long header_size, i;
468
469         if (poolsize < MIN_USEFUL_SIZE) {
470                 tiny_alloc_init(pool, poolsize);
471                 return;
472         }
473
474         /* We rely on page numbers fitting in 16 bit. */
475         BUILD_ASSERT(MAX_SMALL_PAGES < 65536);
476         
477         sp_bits = small_page_bits(poolsize);
478         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
479
480         num_buckets = max_bucket(lp_bits);
481
482         head = pool;
483         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
484
485         memset(head, 0, header_size);
486         for (i = 0; i < num_buckets; i++) {
487                 unsigned long pagesize;
488
489                 if (large_page_bucket(i, sp_bits))
490                         pagesize = 1UL << lp_bits;
491                 else
492                         pagesize = 1UL << sp_bits;
493
494                 head->bs[i].elements_per_page
495                         = elements_per_page(i / INTER_BUCKET_SPACE,
496                                             bucket_to_size(i),
497                                             pagesize);
498         }
499
500         /* They start as all large pages. */
501         memset(head->pagesize, 0xFF, sizeof(head->pagesize));
502         /* FIXME: small pages for last bit? */
503
504         /* Split first page into small pages. */
505         assert(header_size << (1UL << lp_bits));
506         clear_bit(head->pagesize, 0);
507
508         /* Skip over page(s) used by header, add rest to free list */
509         for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
510              i < SMALL_PAGES_PER_LARGE_PAGE;
511              i++) {
512                 ph = from_pgnum(head, i, sp_bits);
513                 ph->elements_used = 0;
514                 add_small_page_to_freelist(head, ph, sp_bits);
515         }
516
517         /* Add the rest of the pages as large pages. */
518         i = SMALL_PAGES_PER_LARGE_PAGE;
519         while ((i << sp_bits) + (1 << lp_bits) <= poolsize) {
520                 ph = from_pgnum(head, i, sp_bits);
521                 ph->elements_used = 0;
522                 add_large_page_to_freelist(head, ph, sp_bits);
523                 i += SMALL_PAGES_PER_LARGE_PAGE;
524         }
525 }
526
527 /* A large page worth of small pages are free: delete them from free list. */
528 static void del_large_from_small_free_list(struct header *head,
529                                            struct page_header *ph,
530                                            unsigned int sp_bits)
531 {
532         unsigned long i;
533
534         for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
535                 del_from_list(head, &head->small_free_list,
536                               (void *)ph + (i << sp_bits),
537                               sp_bits);
538         }
539 }
540
541 static bool all_empty(struct header *head,
542                       unsigned long pgnum,
543                       unsigned sp_bits)
544 {
545         unsigned long i;
546
547         for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
548                 struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits);
549                 if (ph->elements_used)
550                         return false;
551         }
552         return true;
553 }
554
555 static u16 get_large_page(struct header *head, unsigned long poolsize,
556                           unsigned int sp_bits)
557 {
558         unsigned int lp_bits, i, page;
559
560         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
561
562         page = pop_from_list(head, &head->large_free_list, sp_bits);
563         if (likely(page))
564                 return page;
565
566         /* Look for small pages to coalesce, after first large page. */
567         for (i = SMALL_PAGES_PER_LARGE_PAGE;
568              i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE;
569              i += SMALL_PAGES_PER_LARGE_PAGE) {
570                 /* Already a large page? */
571                 if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE))
572                         continue;
573                 if (all_empty(head, i, sp_bits)) {
574                         struct page_header *ph = from_pgnum(head, i, sp_bits);
575                         set_bit(head->pagesize,
576                                 i / SMALL_PAGES_PER_LARGE_PAGE);
577                         del_large_from_small_free_list(head, ph, sp_bits);
578                         add_large_page_to_freelist(head, ph, sp_bits);
579                 }
580         }
581                         
582         return pop_from_list(head, &head->large_free_list, sp_bits);
583 }
584
585 /* Returns small page. */
586 static unsigned long break_up_large_page(struct header *head,
587                                          unsigned int sp_bits,
588                                          u16 lpage)
589 {
590         unsigned int i;
591
592         clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE);
593
594         for (i = 1; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
595                 struct page_header *ph = from_pgnum(head, lpage + i, sp_bits);
596                 add_small_page_to_freelist(head, ph, sp_bits);
597         }
598
599         return lpage;
600 }
601
602 static u16 get_small_page(struct header *head, unsigned long poolsize,
603                           unsigned int sp_bits)
604 {
605         u16 ret;
606
607         ret = pop_from_list(head, &head->small_free_list, sp_bits);
608         if (likely(ret))
609                 return ret;
610         ret = get_large_page(head, poolsize, sp_bits);
611         if (likely(ret))
612                 ret = break_up_large_page(head, sp_bits, ret);
613         return ret;
614 }
615
616 void *alloc_get(void *pool, unsigned long poolsize,
617                 unsigned long size, unsigned long align)
618 {
619         struct header *head = pool;
620         unsigned int bucket;
621         unsigned long i;
622         struct bucket_state *bs;
623         struct page_header *ph;
624         unsigned int sp_bits;
625
626         if (poolsize < MIN_USEFUL_SIZE) {
627                 return tiny_alloc_get(pool, poolsize, size, align);
628         }
629
630         size = align_up(size, align);
631         if (unlikely(!size))
632                 size = 1;
633         bucket = size_to_bucket(size);
634
635         sp_bits = small_page_bits(poolsize);
636
637         if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) {
638                 /* FIXME: huge alloc. */
639                 return NULL;
640         }
641
642         bs = &head->bs[bucket];
643
644         if (!bs->page_list) {
645                 struct page_header *ph;
646
647                 if (large_page_bucket(bucket, sp_bits))
648                         bs->page_list = get_large_page(head, poolsize,
649                                                        sp_bits);
650                 else
651                         bs->page_list = get_small_page(head, poolsize,
652                                                        sp_bits);
653                 /* FIXME: Try large-aligned alloc?  Header stuffing? */
654                 if (unlikely(!bs->page_list))
655                         return NULL;
656                 ph = from_pgnum(head, bs->page_list, sp_bits);
657                 ph->bucket = bucket;
658                 ph->elements_used = 0;
659                 ph->next = 0;
660                 memset(ph->used, 0, used_size(bs->elements_per_page));
661         }
662
663         ph = from_pgnum(head, bs->page_list, sp_bits);
664
665         i = find_free_bit(ph->used);
666         set_bit(ph->used, i);
667         ph->elements_used++;
668
669         /* check if this page is now full */
670         if (unlikely(ph->elements_used == bs->elements_per_page)) {
671                 del_from_bucket_list(head, bs, ph, sp_bits);
672                 add_to_bucket_full_list(head, bs, ph, sp_bits);
673         }
674
675         return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
676                                              bs->elements_per_page)
677                + i * bucket_to_size(bucket);
678 }
679
680 void alloc_free(void *pool, unsigned long poolsize, void *free)
681 {
682         struct header *head = pool;
683         struct bucket_state *bs;
684         unsigned int sp_bits;
685         unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool;
686         bool smallpage;
687         struct page_header *ph;
688
689         if (poolsize < MIN_USEFUL_SIZE) {
690                 return tiny_alloc_free(pool, poolsize, free);
691         }
692         
693         /* Get page header. */
694         sp_bits = small_page_bits(poolsize);
695         pgnum = offset >> sp_bits;
696
697         /* Big page? Round down further. */
698         if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
699                 smallpage = false;
700                 pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
701         } else
702                 smallpage = true;
703
704         /* Step back to page header. */
705         ph = from_pgnum(head, pgnum, sp_bits);
706         bs = &head->bs[ph->bucket];
707         pgoffset = offset - (pgnum << sp_bits)
708                 - page_header_size(ph->bucket / INTER_BUCKET_SPACE,
709                                    bs->elements_per_page);
710
711         if (unlikely(ph->elements_used == bs->elements_per_page)) {
712                 del_from_bucket_full_list(head, bs, ph, sp_bits);
713                 add_to_bucket_list(head, bs, ph, sp_bits);
714         }
715
716         /* Which element are we? */
717         i = pgoffset / bucket_to_size(ph->bucket);
718         clear_bit(ph->used, i);
719         ph->elements_used--;
720
721         if (unlikely(ph->elements_used == 0)) {
722                 bs = &head->bs[ph->bucket];
723                 del_from_bucket_list(head, bs, ph, sp_bits);
724                 if (smallpage)
725                         add_small_page_to_freelist(head, ph, sp_bits);
726                 else
727                         add_large_page_to_freelist(head, ph, sp_bits);
728         }
729 }
730
731 unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
732 {
733         struct header *head = pool;
734         unsigned int pgnum, sp_bits;
735         unsigned long offset = (char *)p - (char *)pool;
736         struct page_header *ph;
737
738         if (poolsize < MIN_USEFUL_SIZE)
739                 return tiny_alloc_size(pool, poolsize, p);
740
741         /* Get page header. */
742         sp_bits = small_page_bits(poolsize);
743         pgnum = offset >> sp_bits;
744
745         /* Big page? Round down further. */
746         if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE))
747                 pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
748
749         /* Step back to page header. */
750         ph = from_pgnum(head, pgnum, sp_bits);
751         return bucket_to_size(ph->bucket);
752 }
753
754 /* Useful for gdb breakpoints. */
755 static bool check_fail(void)
756 {
757         return false;
758 }
759
760 static unsigned long count_bits(const unsigned long bitmap[],
761                                 unsigned long limit)
762 {
763         unsigned long i, count = 0;
764
765         while (limit >= BITS_PER_LONG) {
766                 count += popcount(bitmap[0]);
767                 bitmap++;
768                 limit -= BITS_PER_LONG;
769         }
770
771         for (i = 0; i < limit; i++)
772                 if (test_bit(bitmap, i))
773                         count++;
774         return count;
775 }
776
777 static bool out_of_bounds(unsigned long pgnum,
778                           unsigned int sp_bits,
779                           unsigned long pagesize,
780                           unsigned long poolsize)
781 {
782         if (((pgnum << sp_bits) >> sp_bits) != pgnum)
783                 return true;
784
785         if ((pgnum << sp_bits) > poolsize)
786                 return true;
787
788         return ((pgnum << sp_bits) + pagesize > poolsize);
789 }
790
791 static bool check_bucket(struct header *head,
792                          unsigned long poolsize,
793                          unsigned long pages[],
794                          struct bucket_state *bs,
795                          unsigned int bindex)
796 {
797         bool lp_bucket;
798         struct page_header *ph;
799         unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
800
801         sp_bits = small_page_bits(poolsize);
802         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
803
804         lp_bucket = large_page_bucket(bindex, sp_bits);
805
806         pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
807
808         /* This many elements fit? */
809         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
810                                  bs->elements_per_page);
811         taken += bucket_to_size(bindex) * bs->elements_per_page;
812         if (taken > pagesize)
813                 return check_fail();
814
815         /* One more wouldn't fit? */
816         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
817                                  bs->elements_per_page + 1);
818         taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
819         if (taken <= pagesize)
820                 return check_fail();
821
822         /* Walk used list. */
823         prev = 0;
824         for (i = bs->page_list; i; i = ph->next) {
825                 /* Bad pointer? */
826                 if (out_of_bounds(i, sp_bits, pagesize, poolsize))
827                         return check_fail();
828                 /* Wrong size page? */
829                 if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
830                     != lp_bucket)
831                         return check_fail();
832                 /* Large page not on boundary? */
833                 if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
834                         return check_fail();
835                 ph = from_pgnum(head, i, sp_bits);
836                 /* Linked list corrupt? */
837                 if (ph->prev != prev)
838                         return check_fail();
839                 /* Already seen this page? */
840                 if (test_bit(pages, i))
841                         return check_fail();
842                 set_bit(pages, i);
843                 /* Empty or full? */
844                 if (ph->elements_used == 0)
845                         return check_fail();
846                 if (ph->elements_used >= bs->elements_per_page)
847                         return check_fail();
848                 /* Used bits don't agree? */
849                 if (ph->elements_used != count_bits(ph->used,
850                                                     bs->elements_per_page))
851                         return check_fail();
852                 /* Wrong bucket? */
853                 if (ph->bucket != bindex)
854                         return check_fail();
855                 prev = i;
856         }
857
858         /* Walk full list. */
859         prev = 0;
860         for (i = bs->full_list; i; i = ph->next) {
861                 /* Bad pointer? */
862                 if (out_of_bounds(i, sp_bits, pagesize, poolsize))
863                         return check_fail();
864                 /* Wrong size page? */
865                 if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
866                     != lp_bucket)
867                 /* Large page not on boundary? */
868                 if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
869                         return check_fail();
870                 ph = from_pgnum(head, i, sp_bits);
871                 /* Linked list corrupt? */
872                 if (ph->prev != prev)
873                         return check_fail();
874                 /* Already seen this page? */
875                 if (test_bit(pages, i))
876                         return check_fail();
877                 set_bit(pages, i);
878                 /* Not full? */
879                 if (ph->elements_used != bs->elements_per_page)
880                         return check_fail();
881                 /* Used bits don't agree? */
882                 if (ph->elements_used != count_bits(ph->used,
883                                                     bs->elements_per_page))
884                         return check_fail();
885                 /* Wrong bucket? */
886                 if (ph->bucket != bindex)
887                         return check_fail();
888                 prev = i;
889         }
890         return true;
891 }
892
893 bool alloc_check(void *pool, unsigned long poolsize)
894 {
895         struct header *head = pool;
896         unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
897         struct page_header *ph;
898         unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 };
899
900         if (poolsize < MIN_USEFUL_SIZE)
901                 return tiny_alloc_check(pool, poolsize);
902
903         sp_bits = small_page_bits(poolsize);
904         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
905
906         num_buckets = max_bucket(lp_bits);
907
908         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
909
910         /* First, set all bits taken by header. */
911         for (i = 0; i < header_size; i += (1UL << sp_bits))
912                 set_bit(pages, i >> sp_bits);
913
914         /* Check small page free list. */
915         prev = 0;
916         for (i = head->small_free_list; i; i = ph->next) {
917                 /* Bad pointer? */
918                 if (out_of_bounds(i, sp_bits, 1 << sp_bits, poolsize))
919                         return check_fail();
920                 /* Large page? */
921                 if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
922                         return check_fail();
923                 ph = from_pgnum(head, i, sp_bits);
924                 /* Linked list corrupt? */
925                 if (ph->prev != prev)
926                         return check_fail();
927                 /* Already seen this page? */
928                 if (test_bit(pages, i))
929                         return check_fail();
930                 set_bit(pages, i);
931                 prev = i;
932         }
933
934         /* Check large page free list. */
935         prev = 0;
936         for (i = head->large_free_list; i; i = ph->next) {
937                 /* Bad pointer? */
938                 if (out_of_bounds(i, sp_bits, 1 << lp_bits, poolsize))
939                         return check_fail();
940                 /* Not large page? */
941                 if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
942                         return check_fail();
943                 /* Not page boundary? */
944                 if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
945                         return check_fail();
946                 ph = from_pgnum(head, i, sp_bits);
947                 /* Linked list corrupt? */
948                 if (ph->prev != prev)
949                         return check_fail();
950                 /* Already seen this page? */
951                 if (test_bit(pages, i))
952                         return check_fail();
953                 set_bit(pages, i);
954                 prev = i;
955         }
956
957         /* Check the buckets. */
958         for (i = 0; i < max_bucket(lp_bits); i++) {
959                 struct bucket_state *bs = &head->bs[i];
960
961                 if (!check_bucket(head, poolsize, pages, bs, i))
962                         return false;
963         }
964
965         /* Make sure every page accounted for. */
966         for (i = 0; i < poolsize >> sp_bits; i++) {
967                 if (!test_bit(pages, i))
968                         return check_fail();
969                 if (test_bit(head->pagesize,
970                              i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
971                         /* Large page, skip rest. */
972                         i += SMALL_PAGES_PER_LARGE_PAGE - 1;
973                 }
974         }
975
976         return true;
977 }
978
979 static unsigned long print_overhead(FILE *out, const char *desc,
980                                     unsigned long bytes,
981                                     unsigned long poolsize)
982 {
983         fprintf(out, "Overhead (%s): %lu bytes (%.3g%%)\n",
984                 desc, bytes, 100.0 * bytes / poolsize);
985         return bytes;
986 }
987
988 static unsigned long count_list(struct header *head,
989                                 u16 pgnum,
990                                 unsigned int sp_bits,
991                                 unsigned long *total_elems)
992 {
993         struct page_header *p;
994         unsigned long ret = 0;
995
996         while (pgnum) {
997                 p = from_pgnum(head, pgnum, sp_bits);
998                 if (total_elems)
999                         (*total_elems) += p->elements_used;
1000                 ret++;
1001                 pgnum = p->next;
1002         }
1003         return ret;
1004 }
1005
1006 static unsigned long visualize_bucket(FILE *out, struct header *head,
1007                                       unsigned int bucket,
1008                                       unsigned long poolsize,
1009                                       unsigned int sp_bits)
1010 {
1011         unsigned long num_full, num_partial, num_pages, page_size,
1012                 elems, hdr_min, hdr_size, elems_per_page, overhead = 0;
1013
1014         elems_per_page = head->bs[bucket].elements_per_page;
1015
1016         /* If we used byte-based bitmaps, we could get pg hdr to: */
1017         hdr_min = sizeof(struct page_header)
1018                 - sizeof(((struct page_header *)0)->used)
1019                 + align_up(elems_per_page, CHAR_BIT) / CHAR_BIT;
1020         hdr_size = page_header_size(bucket / INTER_BUCKET_SPACE,
1021                                     elems_per_page);
1022
1023         elems = 0;
1024         num_full = count_list(head, head->bs[bucket].full_list, sp_bits,
1025                               &elems);
1026         num_partial = count_list(head, head->bs[bucket].page_list, sp_bits,
1027                                  &elems);
1028         num_pages = num_full + num_partial;
1029         if (!num_pages)
1030                 return 0;
1031
1032         fprintf(out, "Bucket %u (%lu bytes):"
1033                 " %lu full, %lu partial = %lu elements\n",
1034                 bucket, bucket_to_size(bucket), num_full, num_partial, elems);
1035         /* Strict requirement of page header size. */
1036         overhead += print_overhead(out, "page headers",
1037                                    hdr_min * num_pages, poolsize);
1038         /* Gap between minimal page header and actual start. */
1039         overhead += print_overhead(out, "page post-header alignments",
1040                                    (hdr_size - hdr_min) * num_pages, poolsize);
1041         /* Between last element and end of page. */
1042         page_size = (1 << sp_bits);
1043         if (large_page_bucket(bucket, sp_bits))
1044                 page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE;
1045
1046         overhead += print_overhead(out, "page tails",
1047                                    (page_size - (hdr_size
1048                                                  + (elems_per_page
1049                                                     * bucket_to_size(bucket))))
1050                                    * num_pages, poolsize);
1051         return overhead;
1052 }
1053
1054 static void tiny_alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
1055 {
1056 }
1057
1058 void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
1059 {
1060         struct header *head = pool;
1061         unsigned long i, lp_bits, sp_bits, header_size, num_buckets, count,
1062                 overhead = 0;
1063
1064         fprintf(out, "Pool %p size %lu: (%s allocator)\n", pool, poolsize,
1065                 poolsize < MIN_USEFUL_SIZE ? "tiny" : "standard");
1066
1067         if (poolsize < MIN_USEFUL_SIZE) {
1068                 tiny_alloc_visualize(out, pool, poolsize);
1069                 return;
1070         }
1071         
1072         sp_bits = small_page_bits(poolsize);
1073         lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
1074
1075         num_buckets = max_bucket(lp_bits);
1076         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
1077
1078         fprintf(out, "Large page size %lu, small page size %lu.\n",
1079                 1UL << lp_bits, 1UL << sp_bits);
1080         overhead += print_overhead(out, "unused pool tail",
1081                                    poolsize % (1 << lp_bits), poolsize);
1082         fprintf(out, "Main header %lu bytes (%lu small pages).\n",
1083                 header_size, align_up(header_size, 1 << sp_bits) >> sp_bits);
1084         overhead += print_overhead(out, "partial header page",
1085                                    align_up(header_size, 1 << sp_bits)
1086                                    - header_size, poolsize);
1087         /* Total large pages. */
1088         i = count_bits(head->pagesize, poolsize >> lp_bits);
1089         /* Used pages. */
1090         count = i - count_list(head, head->large_free_list, sp_bits, NULL);
1091         fprintf(out, "%lu/%lu large pages used (%.3g%%)\n",
1092                 count, i, count ? 100.0 * count / i : 0.0);
1093
1094         /* Total small pages. */
1095         i = (poolsize >> lp_bits) - i;
1096         /* Used pages */
1097         count = i - count_list(head, head->small_free_list, sp_bits, NULL);
1098         fprintf(out, "%lu/%lu small pages used (%.3g%%)\n",
1099                 count, i, count ? 100.0 * count / i : 0.0);
1100
1101         /* Summary of each bucket. */
1102         fprintf(out, "%lu buckets:\n", num_buckets);
1103         for (i = 0; i < num_buckets; i++)
1104                 overhead += visualize_bucket(out, head, i, poolsize, sp_bits);
1105
1106         print_overhead(out, "total", overhead, poolsize);
1107 }