]> git.ozlabs.org Git - ccan/blob - ccan/alloc/alloc.c
alloc: first cut of new Tridge-inspired allocator
[ccan] / ccan / alloc / alloc.c
1 #include <unistd.h>
2 #include <stdint.h>
3 #include <string.h>
4 #include <limits.h>
5 #include <assert.h>
6 #include <stdlib.h>
7 #include "alloc.h"
8 #include <ccan/build_assert/build_assert.h>
9 #include <ccan/likely/likely.h>
10 #include <ccan/short_types/short_types.h>
11 #include "config.h"
12
13 /*
14    Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
15    http://samba.org/~tridge/junkcode/alloc_mmap/
16
17    Copyright (C) Andrew Tridgell 2007
18    
19    This library is free software; you can redistribute it and/or
20    modify it under the terms of the GNU Lesser General Public
21    License as published by the Free Software Foundation; either
22    version 2 of the License, or (at your option) any later version.
23
24    This library is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    Lesser General Public License for more details.
28
29    You should have received a copy of the GNU Lesser General Public
30    License along with this library; if not, write to the Free Software
31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
32  */
33
34 #if 0 /* Until we have the tiny allocator working, go down to 1 MB */
35
36 /* We divide the pool into this many large pages (nearest power of 2) */
37 #define MAX_PAGES (1024UL)
38
39 /* 32 small pages == 1 large page. */
40 #define BITS_FROM_SMALL_TO_LARGE_PAGE 5
41
42 #else
43
44 #define MAX_PAGES (128UL)
45 #define BITS_FROM_SMALL_TO_LARGE_PAGE 4
46
47 #endif
48
49 /* Smallest pool size for this scheme: 512-byte small pages.  That's
50  * 4/8% overhead for 32/64 bit. */
51 #define MIN_USEFUL_SIZE (MAX_PAGES << (9 + BITS_FROM_SMALL_TO_LARGE_PAGE))
52
53 /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
54 #define INTER_BUCKET_SPACE 4
55
56 /* FIXME: Figure this out properly. */
57 #define MAX_SIZE (1 << 30)
58
59 /* How few object to fit in a page before using a larger one? (8) */
60 #define MAX_PAGE_OBJECT_ORDER   3
61
62 #define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
63
64 struct bucket_state {
65         unsigned long elements_per_page;
66         unsigned long page_list;
67         unsigned long full_list;
68 };
69
70 struct header {
71         /* 1024 bit bitmap of which pages are large. */
72         unsigned long pagesize[MAX_PAGES / BITS_PER_LONG];
73
74         /* List of unused small/large pages. */
75         unsigned long small_free_list;
76         unsigned long large_free_list;
77
78         /* This is less defined: we have two buckets for each power of 2 */
79         struct bucket_state bs[1];
80 };
81
82 struct page_header {
83         unsigned long next, prev;
84         u32 elements_used;
85         /* FIXME: Pack this in somewhere... */
86         u8 bucket;
87         unsigned long used[1]; /* One bit per element. */
88 };
89
90 /* 2 bit for every byte to allocate. */
91 static void tiny_alloc_init(void *pool, unsigned long poolsize)
92 {
93 /* FIXME */
94 }
95
96 static void *tiny_alloc_get(void *pool, unsigned long poolsize,
97                             unsigned long size, unsigned long align)
98 {
99 /* FIXME */
100         return NULL;
101 }
102
103 static void tiny_alloc_free(void *pool, unsigned long poolsize, void *free)
104 {
105 /* FIXME */
106 }
107
108 static unsigned long tiny_alloc_size(void *pool, unsigned long poolsize,
109                                       void *p)
110 {
111 /* FIXME */
112         return 0;
113 }
114
115 static bool tiny_alloc_check(void *pool, unsigned long poolsize)
116 {
117 /* FIXME */
118         return true;
119 }
120
121 static unsigned int fls(unsigned long val)
122 {
123 #if HAVE_BUILTIN_CLZL
124         /* This is significantly faster! */
125         return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
126 #else
127         unsigned int r = 32;
128
129         if (!val)
130                 return 0;
131         if (!(val & 0xffff0000u)) {
132                 val <<= 16;
133                 r -= 16;
134         }
135         if (!(val & 0xff000000u)) {
136                 val <<= 8;
137                 r -= 8;
138         }
139         if (!(val & 0xf0000000u)) {
140                 val <<= 4;
141                 r -= 4;
142         }
143         if (!(val & 0xc0000000u)) {
144                 val <<= 2;
145                 r -= 2;
146         }
147         if (!(val & 0x80000000u)) {
148                 val <<= 1;
149                 r -= 1;
150         }
151         return r;
152 #endif
153 }
154
155 /* FIXME: Move to bitops. */
156 static unsigned int ffsl(unsigned long val)
157 {
158 #if HAVE_BUILTIN_FFSL
159         /* This is significantly faster! */
160         return __builtin_ffsl(val);
161 #else
162         unsigned int r = 1;
163
164         if (!val)
165                 return 0;
166         if (sizeof(long) == sizeof(u64)) {
167                 if (!(val & 0xffffffff)) {
168                         /* Workaround gcc warning on 32-bit:
169                            error: right shift count >= width of type */
170                         u64 tmp = val;
171                         tmp >>= 32;
172                         val = tmp;
173                         r += 32;
174                 }
175         }
176         if (!(val & 0xffff)) {
177                 val >>= 16;
178                 r += 16;
179         }
180         if (!(val & 0xff)) {
181                 val >>= 8;
182                 r += 8;
183         }
184         if (!(val & 0xf)) {
185                 val >>= 4;
186                 r += 4;
187         }
188         if (!(val & 3)) {
189                 val >>= 2;
190                 r += 2;
191         }
192         if (!(val & 1)) {
193                 val >>= 1;
194                 r += 1;
195         }
196         return r;
197 #endif
198 }
199
200 static unsigned int popcount(unsigned long val)
201 {
202 #if HAVE_BUILTIN_POPCOUNTL
203         return __builtin_popcountl(val);
204 #else
205         if (sizeof(long) == sizeof(u64)) {
206                 u64 v = val;
207                 v = (v & 0x5555555555555555ULL)
208                         + ((v >> 1) & 0x5555555555555555ULL);
209                 v = (v & 0x3333333333333333ULL)
210                         + ((v >> 1) & 0x3333333333333333ULL);
211                 v = (v & 0x0F0F0F0F0F0F0F0FULL)
212                         + ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL);
213                 v = (v & 0x00FF00FF00FF00FFULL)
214                         + ((v >> 1) & 0x00FF00FF00FF00FFULL);
215                 v = (v & 0x0000FFFF0000FFFFULL)
216                         + ((v >> 1) & 0x0000FFFF0000FFFFULL);
217                 v = (v & 0x00000000FFFFFFFFULL)
218                         + ((v >> 1) & 0x00000000FFFFFFFFULL);
219                 return v;
220         }
221         val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL);
222         val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL);
223         val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL);
224         val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL);
225         val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL);
226         return val;
227 #endif
228 }
229
230 /*
231  * Every 4 buckets, the size doubles.
232  * Between buckets, sizes increase linearly.
233  *
234  * eg. bucket 40 = 2^10                 = 1024
235  *     bucket 41 = 2^10 + 2^10*4        = 1024 + 256
236  *     bucket 42 = 2^10 + 2^10*4        = 1024 + 512
237  *     bucket 43 = 2^10 + 2^10*4        = 1024 + 768
238  *     bucket 45 = 2^11                 = 2048
239  *
240  * Care is taken to handle low numbered buckets, at cost of overflow.
241  */
242 static unsigned long bucket_to_size(unsigned int bucket)
243 {
244         unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
245         return base + ((bucket % INTER_BUCKET_SPACE)
246                        << (bucket / INTER_BUCKET_SPACE))
247                 / INTER_BUCKET_SPACE;
248 }
249
250 /*
251  * Say size is 10.
252  *   fls(size/2) == 3.  1 << 3 == 8, so we're 2 too large, out of a possible
253  * 8 too large.  That's 1/4 of the way to the next power of 2 == 1 bucket.
254  *
255  * We make sure we round up.  Note that this fails on 32 bit at size
256  * 1879048193 (around bucket 120).
257  */
258 static unsigned int size_to_bucket(unsigned long size)
259 {
260         unsigned int base = fls(size/2);
261         unsigned long overshoot;
262
263         overshoot = size - (1 << base);
264         return base * INTER_BUCKET_SPACE
265                 + ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
266 }
267
268 static unsigned int large_page_bits(unsigned long poolsize)
269 {
270         return fls(poolsize / MAX_PAGES / 2);
271 }
272
273 static unsigned long align_up(unsigned long x, unsigned long align)
274 {
275         return (x + align - 1) & ~(align - 1);
276 }
277
278 static void *from_off(struct header *head, unsigned long off)
279 {
280         return (char *)head + off;
281 }
282
283 static unsigned long to_off(struct header *head, void *p)
284 {
285         return (char *)p - (char *)head;
286 }
287
288 static size_t used_size(unsigned int num_elements)
289 {
290         return (num_elements + BITS_PER_LONG-1) / BITS_PER_LONG;
291 }
292
293 /*
294  * We always align the first entry to the lower power of 2.
295  * eg. the 12-byte bucket gets 8-byte aligned.  The 4096-byte bucket
296  * gets 4096-byte aligned.
297  */
298 static unsigned long page_header_size(unsigned int align_bits,
299                                       unsigned long num_elements)
300 {
301         unsigned long size;
302
303         size = sizeof(struct page_header)
304                 - sizeof(((struct page_header *)0)->used)
305                 + used_size(num_elements);
306         return align_up(size, 1 << align_bits);
307 }
308
309 static void add_to_list(struct header *head,
310                         unsigned long *list, struct page_header *ph)
311 {
312         unsigned long h = *list, offset = to_off(head, ph);
313
314         ph->next = h;
315         if (h) {
316                 struct page_header *prev = from_off(head, h);
317                 assert(prev->prev == 0);
318                 prev->prev = offset;
319         }
320         *list = offset;
321         ph->prev = 0;
322 }
323
324 static void del_from_list(struct header *head,
325                           unsigned long *list, struct page_header *ph)
326 {
327         /* Front of list? */
328         if (ph->prev == 0) {
329                 *list = ph->next;
330         } else {
331                 struct page_header *prev = from_off(head, ph->prev);
332                 prev->next = ph->next;
333         }
334         if (ph->next != 0) {
335                 struct page_header *next = from_off(head, ph->next);
336                 next->prev = ph->prev;
337         }
338 }
339
340 static unsigned long pop_from_list(struct header *head,
341                                    unsigned long *list)
342 {
343         unsigned long h = *list;
344         struct page_header *ph = from_off(head, h);
345
346         if (likely(h)) {
347                 *list = ph->next;
348                 if (*list) {
349                         struct page_header *next = from_off(head, *list);
350                         next->prev = 0;
351                 }
352         }
353         return h;
354 }
355
356 static void add_small_page_to_freelist(struct header *head,
357                                        struct page_header *ph)
358 {
359         add_to_list(head, &head->small_free_list, ph);
360 }
361
362 static void add_large_page_to_freelist(struct header *head,
363                                        struct page_header *ph)
364 {
365         add_to_list(head, &head->large_free_list, ph);
366 }
367
368 static void add_to_bucket_list(struct header *head,
369                                struct bucket_state *bs,
370                                struct page_header *ph)
371 {
372         add_to_list(head, &bs->page_list, ph);
373 }
374
375 static void del_from_bucket_list(struct header *head,
376                                  struct bucket_state *bs,
377                                  struct page_header *ph)
378 {
379         del_from_list(head, &bs->page_list, ph);
380 }
381
382 static void del_from_bucket_full_list(struct header *head,
383                                       struct bucket_state *bs,
384                                       struct page_header *ph)
385 {
386         del_from_list(head, &bs->full_list, ph);
387 }
388
389 static void add_to_bucket_full_list(struct header *head,
390                                     struct bucket_state *bs,
391                                     struct page_header *ph)
392 {
393         add_to_list(head, &bs->full_list, ph);
394 }
395
396 static void clear_bit(unsigned long bitmap[], unsigned int off)
397 {
398         bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
399 }
400
401 static bool test_bit(const unsigned long bitmap[], unsigned int off)
402 {
403         return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
404 }
405
406 static void set_bit(unsigned long bitmap[], unsigned int off)
407 {
408         bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
409 }
410
411 /* There must be a bit to be found. */
412 static unsigned int find_free_bit(const unsigned long bitmap[])
413 {
414         unsigned int i;
415
416         for (i = 0; bitmap[i] == -1UL; i++);
417         return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
418 }
419
420 /* How many elements can we fit in a page? */
421 static unsigned long elements_per_page(unsigned long align_bits,
422                                        unsigned long esize,
423                                        unsigned long psize)
424 {
425         unsigned long num, overhead;
426
427         /* First approximation: no extra room for bitmap. */
428         overhead = align_up(sizeof(struct page_header), 1 << align_bits);
429         num = (psize - overhead) / esize;
430
431         while (page_header_size(align_bits, num) + esize * num > psize)
432                 num--;
433         return num;
434 }
435
436 static bool large_page_bucket(unsigned int bucket, unsigned long poolsize)
437 {
438         unsigned int sp_bits;
439         unsigned long max_smallsize;
440
441         sp_bits = large_page_bits(poolsize) - BITS_FROM_SMALL_TO_LARGE_PAGE;
442         /* Note: this doesn't take into account page header. */
443         max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
444
445         return bucket_to_size(bucket) > max_smallsize;
446 }
447
448 static unsigned int max_bucket(unsigned int lp_bits)
449 {
450         return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
451 }
452
453 void alloc_init(void *pool, unsigned long poolsize)
454 {
455         struct header *head = pool;
456         struct page_header *ph;
457         unsigned int lp_bits, sp_bits, num_buckets;
458         unsigned long header_size, i;
459
460         if (poolsize < MIN_USEFUL_SIZE) {
461                 tiny_alloc_init(pool, poolsize);
462                 return;
463         }
464
465         lp_bits = large_page_bits(poolsize);
466         sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
467
468         num_buckets = max_bucket(lp_bits);
469
470         head = pool;
471         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
472
473         memset(head, 0, header_size);
474         for (i = 0; i < num_buckets; i++) {
475                 unsigned long pagesize;
476
477                 if (large_page_bucket(i, poolsize))
478                         pagesize = 1UL << lp_bits;
479                 else
480                         pagesize = 1UL << sp_bits;
481
482                 head->bs[i].elements_per_page
483                         = elements_per_page(i / INTER_BUCKET_SPACE,
484                                             bucket_to_size(i),
485                                             pagesize);
486         }
487
488         /* They start as all large pages. */
489         memset(head->pagesize, 0xFF, sizeof(head->pagesize));
490         /* FIXME: small pages for last bit? */
491
492         /* Split first page into small pages. */
493         assert(header_size << (1UL << lp_bits));
494         clear_bit(head->pagesize, 0);
495
496         /* Skip over page(s) used by header, add rest to free list */
497         for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
498              i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE);
499              i++) {
500                 ph = from_off(head, i<<sp_bits);
501                 ph->elements_used = 0;
502                 add_small_page_to_freelist(head, ph);
503         }
504
505         /* Add the rest of the pages as large pages. */
506         i = (1 << lp_bits);
507         while (i + (1 << lp_bits) <= poolsize) {
508                 ph = from_off(head, i);
509                 ph->elements_used = 0;
510                 add_large_page_to_freelist(head, ph);
511                 i += (1 << lp_bits);
512         }
513 }
514
515 /* A large page worth of small pages are free: delete them from free list. */
516 static void del_large_from_small_free_list(struct header *head,
517                                            struct page_header *ph,
518                                            unsigned int sp_bits)
519 {
520         unsigned long i;
521
522         for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
523                 del_from_list(head, &head->small_free_list,
524                               (void *)ph + (i << sp_bits));
525         }
526 }
527
528 static bool all_empty(struct header *head, unsigned long off, unsigned sp_bits)
529 {
530         unsigned long i;
531
532         for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) {
533                 struct page_header *ph = from_off(head, off + (i << sp_bits));
534                 if (ph->elements_used)
535                         return false;
536         }
537         return true;
538 }
539
540 static unsigned long get_large_page(struct header *head,
541                                     unsigned long poolsize)
542 {
543         unsigned long lp_bits, sp_bits, i, page;
544
545         page = pop_from_list(head, &head->large_free_list);
546         if (likely(page))
547                 return page;
548
549         /* Look for small pages to coalesce, after first large page. */
550         lp_bits = large_page_bits(poolsize);
551         sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
552
553         for (i = (1 << lp_bits); i < poolsize; i += (1 << lp_bits)) {
554                 /* Already a large page? */
555                 if (test_bit(head->pagesize, i >> lp_bits))
556                         continue;
557                 if (all_empty(head, i, sp_bits)) {
558                         struct page_header *ph = from_off(head, i);
559                         set_bit(head->pagesize, i >> lp_bits);
560                         del_large_from_small_free_list(head, ph, sp_bits);
561                         add_large_page_to_freelist(head, ph);
562                 }
563         }
564                         
565         return pop_from_list(head, &head->large_free_list);
566 }
567
568 /* Returns small page. */
569 static unsigned long break_up_large_page(struct header *head,
570                                          unsigned long psize,
571                                          unsigned long lpage)
572 {
573         unsigned long lp_bits, sp_bits, i;
574
575         lp_bits = large_page_bits(psize);
576         sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
577         clear_bit(head->pagesize, lpage >> lp_bits);
578
579         for (i = 1; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++)
580                 add_small_page_to_freelist(head,
581                                            from_off(head,
582                                                     lpage + (i<<sp_bits)));
583
584         return lpage;
585 }
586
587 static unsigned long get_small_page(struct header *head,
588                                     unsigned long poolsize)
589 {
590         unsigned long ret;
591
592         ret = pop_from_list(head, &head->small_free_list);
593         if (likely(ret))
594                 return ret;
595         ret = get_large_page(head, poolsize);
596         if (likely(ret))
597                 ret = break_up_large_page(head, poolsize, ret);
598         return ret;
599 }
600
601 void *alloc_get(void *pool, unsigned long poolsize,
602                 unsigned long size, unsigned long align)
603 {
604         struct header *head = pool;
605         unsigned int bucket;
606         unsigned long i;
607         struct bucket_state *bs;
608         struct page_header *ph;
609
610         if (poolsize < MIN_USEFUL_SIZE) {
611                 return tiny_alloc_get(pool, poolsize, size, align);
612         }
613
614         size = align_up(size, align);
615         if (unlikely(!size))
616                 size = 1;
617         bucket = size_to_bucket(size);
618
619         if (bucket >= max_bucket(large_page_bits(poolsize))) {
620                 /* FIXME: huge alloc. */
621                 return NULL;
622         }
623
624         bs = &head->bs[bucket];
625
626         if (!bs->page_list) {
627                 struct page_header *ph;
628
629                 if (large_page_bucket(bucket, poolsize))
630                         bs->page_list = get_large_page(head, poolsize);
631                 else
632                         bs->page_list = get_small_page(head, poolsize);
633                 /* FIXME: Try large-aligned alloc?  Header stuffing? */
634                 if (unlikely(!bs->page_list))
635                         return NULL;
636                 ph = from_off(head, bs->page_list);
637                 ph->bucket = bucket;
638                 ph->elements_used = 0;
639                 ph->next = 0;
640                 memset(ph->used, 0, used_size(bs->elements_per_page));
641         }
642
643         ph = from_off(head, bs->page_list);
644
645         i = find_free_bit(ph->used);
646         set_bit(ph->used, i);
647         ph->elements_used++;
648
649         /* check if this page is now full */
650         if (unlikely(ph->elements_used == bs->elements_per_page)) {
651                 del_from_bucket_list(head, bs, ph);
652                 add_to_bucket_full_list(head, bs, ph);
653         }
654
655         return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
656                                              bs->elements_per_page)
657                + i * bucket_to_size(bucket);
658 }
659
660 void alloc_free(void *pool, unsigned long poolsize, void *free)
661 {
662         struct header *head = pool;
663         struct bucket_state *bs;
664         unsigned int pagebits;
665         unsigned long i, pgoffset, offset = (char *)free - (char *)pool;
666         bool smallpage;
667         struct page_header *ph;
668
669         if (poolsize < MIN_USEFUL_SIZE) {
670                 return tiny_alloc_free(pool, poolsize, free);
671         }
672
673         /* Get page header. */
674         pagebits = large_page_bits(poolsize);
675         if (!test_bit(head->pagesize, offset >> pagebits)) {
676                 smallpage = true;
677                 pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
678         } else
679                 smallpage = false;
680
681         /* Step back to page header. */
682         ph = from_off(head, offset & ~((1UL << pagebits) - 1));
683         bs = &head->bs[ph->bucket];
684         pgoffset = (offset & ((1UL << pagebits) - 1))
685                 - page_header_size(ph->bucket / INTER_BUCKET_SPACE,
686                                    bs->elements_per_page);
687
688         if (unlikely(ph->elements_used == bs->elements_per_page)) {
689                 del_from_bucket_full_list(head, bs, ph);
690                 add_to_bucket_list(head, bs, ph);
691         }
692
693         /* Which element are we? */
694         i = pgoffset / bucket_to_size(ph->bucket);
695         clear_bit(ph->used, i);
696         ph->elements_used--;
697
698         if (unlikely(ph->elements_used == 0)) {
699                 bs = &head->bs[ph->bucket];
700                 del_from_bucket_list(head, bs, ph);
701                 if (smallpage)
702                         add_small_page_to_freelist(head, ph);
703                 else
704                         add_large_page_to_freelist(head, ph);
705         }
706 }
707
708 unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
709 {
710         struct header *head = pool;
711         unsigned int pagebits;
712         unsigned long offset = (char *)p - (char *)pool;
713         struct page_header *ph;
714
715         if (poolsize < MIN_USEFUL_SIZE)
716                 return tiny_alloc_size(pool, poolsize, p);
717
718         /* Get page header. */
719         pagebits = large_page_bits(poolsize);
720         if (!test_bit(head->pagesize, offset >> pagebits))
721                 pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE;
722
723         /* Step back to page header. */
724         ph = from_off(head, offset & ~((1UL << pagebits) - 1));
725         return bucket_to_size(ph->bucket);
726 }
727
728 /* Useful for gdb breakpoints. */
729 static bool check_fail(void)
730 {
731         return false;
732 }
733
734 static unsigned long count_bits(const unsigned long bitmap[],
735                                 unsigned long limit)
736 {
737         unsigned long i, count = 0;
738
739         while (limit >= BITS_PER_LONG) {
740                 count += popcount(bitmap[0]);
741                 bitmap++;
742                 limit -= BITS_PER_LONG;
743         }
744
745         for (i = 0; i < limit; i++)
746                 if (test_bit(bitmap, i))
747                         count++;
748         return count;
749 }
750
751 static bool out_of_bounds(unsigned long off,
752                           unsigned long pagesize,
753                           unsigned long poolsize)
754 {
755         return (off > poolsize || off + pagesize > poolsize);
756 }
757
758 static bool check_bucket(struct header *head,
759                          unsigned long poolsize,
760                          unsigned long pages[],
761                          struct bucket_state *bs,
762                          unsigned int bindex)
763 {
764         bool lp_bucket = large_page_bucket(bindex, poolsize);
765         struct page_header *ph;
766         unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
767
768         lp_bits = large_page_bits(poolsize);
769         sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
770
771         pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
772
773         /* This many elements fit? */
774         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
775                                  bs->elements_per_page);
776         taken += bucket_to_size(bindex) * bs->elements_per_page;
777         if (taken > pagesize)
778                 return check_fail();
779
780         /* One more wouldn't fit? */
781         taken = page_header_size(bindex / INTER_BUCKET_SPACE,
782                                  bs->elements_per_page + 1);
783         taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
784         if (taken <= pagesize)
785                 return check_fail();
786
787         /* Walk used list. */
788         prev = 0;
789         for (i = bs->page_list; i; i = ph->next) {
790                 /* Bad pointer? */
791                 if (out_of_bounds(i, pagesize, poolsize))
792                         return check_fail();
793                 /* Wrong size page? */
794                 if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
795                         return check_fail();
796                 /* Not page boundary? */
797                 if (i % pagesize)
798                         return check_fail();
799                 ph = from_off(head, i);
800                 /* Linked list corrupt? */
801                 if (ph->prev != prev)
802                         return check_fail();
803                 /* Already seen this page? */
804                 if (test_bit(pages, i >> sp_bits))
805                         return check_fail();
806                 set_bit(pages, i >> sp_bits);
807                 /* Empty or full? */
808                 if (ph->elements_used == 0)
809                         return check_fail();
810                 if (ph->elements_used >= bs->elements_per_page)
811                         return check_fail();
812                 /* Used bits don't agree? */
813                 if (ph->elements_used != count_bits(ph->used,
814                                                     bs->elements_per_page))
815                         return check_fail();
816                 /* Wrong bucket? */
817                 if (ph->bucket != bindex)
818                         return check_fail();
819                 prev = i;
820         }
821
822         /* Walk full list. */
823         prev = 0;
824         for (i = bs->full_list; i; i = ph->next) {
825                 /* Bad pointer? */
826                 if (out_of_bounds(i, pagesize, poolsize))
827                         return check_fail();
828                 /* Wrong size page? */
829                 if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket)
830                         return check_fail();
831                 /* Not page boundary? */
832                 if (i % pagesize)
833                         return check_fail();
834                 ph = from_off(head, i);
835                 /* Linked list corrupt? */
836                 if (ph->prev != prev)
837                         return check_fail();
838                 /* Already seen this page? */
839                 if (test_bit(pages, i >> sp_bits))
840                         return check_fail();
841                 set_bit(pages, i >> sp_bits);
842                 /* Not full? */
843                 if (ph->elements_used != bs->elements_per_page)
844                         return check_fail();
845                 /* Used bits don't agree? */
846                 if (ph->elements_used != count_bits(ph->used,
847                                                     bs->elements_per_page))
848                         return check_fail();
849                 /* Wrong bucket? */
850                 if (ph->bucket != bindex)
851                         return check_fail();
852                 prev = i;
853         }
854         return true;
855 }
856
857 bool alloc_check(void *pool, unsigned long poolsize)
858 {
859         struct header *head = pool;
860         unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
861         struct page_header *ph;
862         unsigned long pages[(MAX_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
863                             / BITS_PER_LONG] = { 0 };
864
865         if (poolsize < MIN_USEFUL_SIZE)
866                 return tiny_alloc_check(pool, poolsize);
867
868         lp_bits = large_page_bits(poolsize);
869         sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE;
870
871         num_buckets = max_bucket(lp_bits);
872
873         header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
874
875         /* First, set all bits taken by header. */
876         for (i = 0; i < header_size; i += (1UL << sp_bits))
877                 set_bit(pages, i >> sp_bits);
878
879         /* Check small page free list. */
880         prev = 0;
881         for (i = head->small_free_list; i; i = ph->next) {
882                 /* Bad pointer? */
883                 if (out_of_bounds(i, 1 << sp_bits, poolsize))
884                         return check_fail();
885                 /* Large page? */
886                 if (test_bit(head->pagesize, i >> lp_bits))
887                         return check_fail();
888                 /* Not page boundary? */
889                 if (i % (1 << sp_bits))
890                         return check_fail();
891                 ph = from_off(head, i);
892                 /* Linked list corrupt? */
893                 if (ph->prev != prev)
894                         return check_fail();
895                 /* Already seen this page? */
896                 if (test_bit(pages, i >> sp_bits))
897                         return check_fail();
898                 set_bit(pages, i >> sp_bits);
899                 prev = i;
900         }
901
902         /* Check large page free list. */
903         prev = 0;
904         for (i = head->large_free_list; i; i = ph->next) {
905                 /* Bad pointer? */
906                 if (out_of_bounds(i, 1 << lp_bits, poolsize))
907                         return check_fail();
908                 /* Not large page? */
909                 if (!test_bit(head->pagesize, i >> lp_bits))
910                         return check_fail();
911                 /* Not page boundary? */
912                 if (i % (1 << lp_bits))
913                         return check_fail();
914                 ph = from_off(head, i);
915                 /* Linked list corrupt? */
916                 if (ph->prev != prev)
917                         return check_fail();
918                 /* Already seen this page? */
919                 if (test_bit(pages, i >> sp_bits))
920                         return check_fail();
921                 set_bit(pages, i >> sp_bits);
922                 prev = i;
923         }
924
925         /* Check the buckets. */
926         for (i = 0; i < max_bucket(lp_bits); i++) {
927                 struct bucket_state *bs = &head->bs[i];
928
929                 if (!check_bucket(head, poolsize, pages, bs, i))
930                         return false;
931         }
932
933         /* Make sure every page accounted for. */
934         for (i = 0; i < poolsize >> sp_bits; i++) {
935                 if (!test_bit(pages, i))
936                         return check_fail();
937                 if (test_bit(head->pagesize,
938                              i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
939                         /* Large page, skip rest. */
940                         i += (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) - 1;
941                 }
942         }
943
944         return true;
945 }