X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Falloc%2Falloc.c;h=64475078c0a6811f88b5dcf9861492a117a2ff41;hp=1b011caaf1df45a09f402b7399f98d834af00069;hb=a40b318e7a07a452ae7456053727bd11b2fa49b4;hpb=a7f635002edf6af05ecf8b6b5cc121eaaa588c46 diff --git a/ccan/alloc/alloc.c b/ccan/alloc/alloc.c index 1b011caa..64475078 100644 --- a/ccan/alloc/alloc.c +++ b/ccan/alloc/alloc.c @@ -5,9 +5,13 @@ #include #include #include "alloc.h" +#include "bitops.h" +#include "tiny.h" #include #include +#include #include +#include #include "config.h" /* @@ -31,28 +35,23 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if 0 /* Until we have the tiny allocator working, go down to 1 MB */ - /* We divide the pool into this many large pages (nearest power of 2) */ -#define MAX_PAGES (1024UL) +#define MAX_LARGE_PAGES (256UL) /* 32 small pages == 1 large page. */ #define BITS_FROM_SMALL_TO_LARGE_PAGE 5 -#else - -#define MAX_PAGES (128UL) -#define BITS_FROM_SMALL_TO_LARGE_PAGE 4 +#define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE) -#endif - -/* Smallest pool size for this scheme: 512-byte small pages. That's - * 4/8% overhead for 32/64 bit. */ -#define MIN_USEFUL_SIZE (MAX_PAGES << (9 + BITS_FROM_SMALL_TO_LARGE_PAGE)) +/* Smallest pool size for this scheme: 128-byte small pages. That's + * 9/13% overhead for 32/64 bit. */ +#define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128) /* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */ #define INTER_BUCKET_SPACE 4 +#define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) + /* FIXME: Figure this out properly. */ #define MAX_SIZE (1 << 30) @@ -62,170 +61,38 @@ #define BITS_PER_LONG (sizeof(long) * CHAR_BIT) struct bucket_state { - unsigned long elements_per_page; - unsigned long page_list; - unsigned long full_list; + u32 elements_per_page; + u16 page_list; + u16 full_list; }; struct header { - /* 1024 bit bitmap of which pages are large. */ - unsigned long pagesize[MAX_PAGES / BITS_PER_LONG]; + /* Bitmap of which pages are large. */ + unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG]; /* List of unused small/large pages. */ - unsigned long small_free_list; - unsigned long large_free_list; + u16 small_free_list; + u16 large_free_list; + + /* List of huge allocs. */ + unsigned long huge; /* This is less defined: we have two buckets for each power of 2 */ struct bucket_state bs[1]; }; -struct page_header { +struct huge_alloc { unsigned long next, prev; - u32 elements_used; - /* FIXME: Pack this in somewhere... */ - u8 bucket; - unsigned long used[1]; /* One bit per element. */ + unsigned long off, len; }; -/* 2 bit for every byte to allocate. */ -static void tiny_alloc_init(void *pool, unsigned long poolsize) -{ -/* FIXME */ -} - -static void *tiny_alloc_get(void *pool, unsigned long poolsize, - unsigned long size, unsigned long align) -{ -/* FIXME */ - return NULL; -} - -static void tiny_alloc_free(void *pool, unsigned long poolsize, void *free) -{ -/* FIXME */ -} - -static unsigned long tiny_alloc_size(void *pool, unsigned long poolsize, - void *p) -{ -/* FIXME */ - return 0; -} - -static bool tiny_alloc_check(void *pool, unsigned long poolsize) -{ -/* FIXME */ - return true; -} - -static unsigned int fls(unsigned long val) -{ -#if HAVE_BUILTIN_CLZL - /* This is significantly faster! */ - return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0; -#else - unsigned int r = 32; - - if (!val) - return 0; - if (!(val & 0xffff0000u)) { - val <<= 16; - r -= 16; - } - if (!(val & 0xff000000u)) { - val <<= 8; - r -= 8; - } - if (!(val & 0xf0000000u)) { - val <<= 4; - r -= 4; - } - if (!(val & 0xc0000000u)) { - val <<= 2; - r -= 2; - } - if (!(val & 0x80000000u)) { - val <<= 1; - r -= 1; - } - return r; -#endif -} - -/* FIXME: Move to bitops. */ -static unsigned int ffsl(unsigned long val) -{ -#if HAVE_BUILTIN_FFSL - /* This is significantly faster! */ - return __builtin_ffsl(val); -#else - unsigned int r = 1; - - if (!val) - return 0; - if (sizeof(long) == sizeof(u64)) { - if (!(val & 0xffffffff)) { - /* Workaround gcc warning on 32-bit: - error: right shift count >= width of type */ - u64 tmp = val; - tmp >>= 32; - val = tmp; - r += 32; - } - } - if (!(val & 0xffff)) { - val >>= 16; - r += 16; - } - if (!(val & 0xff)) { - val >>= 8; - r += 8; - } - if (!(val & 0xf)) { - val >>= 4; - r += 4; - } - if (!(val & 3)) { - val >>= 2; - r += 2; - } - if (!(val & 1)) { - val >>= 1; - r += 1; - } - return r; -#endif -} - -static unsigned int popcount(unsigned long val) -{ -#if HAVE_BUILTIN_POPCOUNTL - return __builtin_popcountl(val); -#else - if (sizeof(long) == sizeof(u64)) { - u64 v = val; - v = (v & 0x5555555555555555ULL) - + ((v >> 1) & 0x5555555555555555ULL); - v = (v & 0x3333333333333333ULL) - + ((v >> 1) & 0x3333333333333333ULL); - v = (v & 0x0F0F0F0F0F0F0F0FULL) - + ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL); - v = (v & 0x00FF00FF00FF00FFULL) - + ((v >> 1) & 0x00FF00FF00FF00FFULL); - v = (v & 0x0000FFFF0000FFFFULL) - + ((v >> 1) & 0x0000FFFF0000FFFFULL); - v = (v & 0x00000000FFFFFFFFULL) - + ((v >> 1) & 0x00000000FFFFFFFFULL); - return v; - } - val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL); - val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL); - val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL); - val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL); - val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL); - return val; -#endif -} +struct page_header { + u16 next, prev; + /* FIXME: We can just count all-0 and all-1 used[] elements. */ + unsigned elements_used : 25; + unsigned bucket : 7; + unsigned long used[1]; /* One bit per element. */ +}; /* * Every 4 buckets, the size doubles. @@ -241,7 +108,7 @@ static unsigned int popcount(unsigned long val) */ static unsigned long bucket_to_size(unsigned int bucket) { - unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE); + unsigned long base = 1UL << (bucket / INTER_BUCKET_SPACE); return base + ((bucket % INTER_BUCKET_SPACE) << (bucket / INTER_BUCKET_SPACE)) / INTER_BUCKET_SPACE; @@ -260,29 +127,26 @@ static unsigned int size_to_bucket(unsigned long size) unsigned int base = fls(size/2); unsigned long overshoot; - overshoot = size - (1 << base); + overshoot = size - (1UL << base); return base * INTER_BUCKET_SPACE - + ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base); + + ((overshoot * INTER_BUCKET_SPACE + (1UL << base)-1) >> base); } -static unsigned int large_page_bits(unsigned long poolsize) +static unsigned int small_page_bits(unsigned long poolsize) { - return fls(poolsize / MAX_PAGES / 2); + return fls(poolsize / MAX_SMALL_PAGES - 1); } -static unsigned long align_up(unsigned long x, unsigned long align) +static struct page_header *from_pgnum(struct header *head, + unsigned long pgnum, + unsigned sp_bits) { - return (x + align - 1) & ~(align - 1); + return (struct page_header *)((char *)head + (pgnum << sp_bits)); } -static struct page_header *from_off(struct header *head, unsigned long off) +static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits) { - return (struct page_header *)((char *)head + off); -} - -static unsigned long to_off(struct header *head, void *p) -{ - return (char *)p - (char *)head; + return ((char *)p - (char *)head) >> sp_bits; } static size_t used_size(unsigned int num_elements) @@ -303,17 +167,17 @@ static unsigned long page_header_size(unsigned int align_bits, size = sizeof(struct page_header) - sizeof(((struct page_header *)0)->used) + used_size(num_elements); - return align_up(size, 1 << align_bits); + return align_up(size, 1UL << align_bits); } static void add_to_list(struct header *head, - unsigned long *list, struct page_header *ph) + u16 *list, struct page_header *ph, unsigned sp_bits) { - unsigned long h = *list, offset = to_off(head, ph); + unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits); ph->next = h; if (h) { - struct page_header *prev = from_off(head, h); + struct page_header *prev = from_pgnum(head, h, sp_bits); assert(prev->prev == 0); prev->prev = offset; } @@ -322,90 +186,125 @@ static void add_to_list(struct header *head, } static void del_from_list(struct header *head, - unsigned long *list, struct page_header *ph) + u16 *list, struct page_header *ph, unsigned sp_bits) { /* Front of list? */ if (ph->prev == 0) { *list = ph->next; } else { - struct page_header *prev = from_off(head, ph->prev); + struct page_header *prev = from_pgnum(head, ph->prev, sp_bits); prev->next = ph->next; } if (ph->next != 0) { - struct page_header *next = from_off(head, ph->next); + struct page_header *next = from_pgnum(head, ph->next, sp_bits); next->prev = ph->prev; } } -static unsigned long pop_from_list(struct header *head, - unsigned long *list) +static u16 pop_from_list(struct header *head, + u16 *list, + unsigned int sp_bits) { - unsigned long h = *list; - struct page_header *ph = from_off(head, h); + u16 h = *list; + struct page_header *ph = from_pgnum(head, h, sp_bits); if (likely(h)) { *list = ph->next; - if (*list) { - struct page_header *next = from_off(head, *list); - next->prev = 0; - } + if (*list) + from_pgnum(head, *list, sp_bits)->prev = 0; } return h; } +static void add_to_huge_list(struct header *head, struct huge_alloc *ha) +{ + unsigned long h = head->huge; + unsigned long offset = (char *)ha - (char *)head; + + ha->next = h; + if (h) { + struct huge_alloc *prev = (void *)((char *)head + h); + assert(prev->prev == 0); + prev->prev = offset; + } + head->huge = offset; + ha->prev = 0; +} + +static void del_from_huge(struct header *head, struct huge_alloc *ha) +{ + /* Front of list? */ + if (ha->prev == 0) { + head->huge = ha->next; + } else { + struct huge_alloc *prev = (void *)((char *)head + ha->prev); + prev->next = ha->next; + } + if (ha->next != 0) { + struct huge_alloc *next = (void *)((char *)head + ha->next); + next->prev = ha->prev; + } +} + static void add_small_page_to_freelist(struct header *head, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - add_to_list(head, &head->small_free_list, ph); + add_to_list(head, &head->small_free_list, ph, sp_bits); } static void add_large_page_to_freelist(struct header *head, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - add_to_list(head, &head->large_free_list, ph); + add_to_list(head, &head->large_free_list, ph, sp_bits); } static void add_to_bucket_list(struct header *head, struct bucket_state *bs, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - add_to_list(head, &bs->page_list, ph); + add_to_list(head, &bs->page_list, ph, sp_bits); } static void del_from_bucket_list(struct header *head, struct bucket_state *bs, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - del_from_list(head, &bs->page_list, ph); + del_from_list(head, &bs->page_list, ph, sp_bits); } static void del_from_bucket_full_list(struct header *head, struct bucket_state *bs, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - del_from_list(head, &bs->full_list, ph); + del_from_list(head, &bs->full_list, ph, sp_bits); } static void add_to_bucket_full_list(struct header *head, struct bucket_state *bs, - struct page_header *ph) + struct page_header *ph, + unsigned int sp_bits) { - add_to_list(head, &bs->full_list, ph); + add_to_list(head, &bs->full_list, ph, sp_bits); } static void clear_bit(unsigned long bitmap[], unsigned int off) { - bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG)); + bitmap[off / BITS_PER_LONG] &= ~(1UL << (off % BITS_PER_LONG)); } static bool test_bit(const unsigned long bitmap[], unsigned int off) { - return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG)); + return bitmap[off / BITS_PER_LONG] & (1UL << (off % BITS_PER_LONG)); } static void set_bit(unsigned long bitmap[], unsigned int off) { - bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG)); + bitmap[off / BITS_PER_LONG] |= (1UL << (off % BITS_PER_LONG)); } /* There must be a bit to be found. */ @@ -425,7 +324,7 @@ static unsigned long elements_per_page(unsigned long align_bits, unsigned long num, overhead; /* First approximation: no extra room for bitmap. */ - overhead = align_up(sizeof(struct page_header), 1 << align_bits); + overhead = align_up(sizeof(struct page_header), 1UL << align_bits); num = (psize - overhead) / esize; while (page_header_size(align_bits, num) + esize * num > psize) @@ -433,12 +332,10 @@ static unsigned long elements_per_page(unsigned long align_bits, return num; } -static bool large_page_bucket(unsigned int bucket, unsigned long poolsize) +static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits) { - unsigned int sp_bits; unsigned long max_smallsize; - sp_bits = large_page_bits(poolsize) - BITS_FROM_SMALL_TO_LARGE_PAGE; /* Note: this doesn't take into account page header. */ max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER; @@ -462,8 +359,11 @@ void alloc_init(void *pool, unsigned long poolsize) return; } - lp_bits = large_page_bits(poolsize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; + /* We rely on page numbers fitting in 16 bit. */ + BUILD_ASSERT(MAX_SMALL_PAGES < 65536); + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; num_buckets = max_bucket(lp_bits); @@ -474,7 +374,7 @@ void alloc_init(void *pool, unsigned long poolsize) for (i = 0; i < num_buckets; i++) { unsigned long pagesize; - if (large_page_bucket(i, poolsize)) + if (large_page_bucket(i, sp_bits)) pagesize = 1UL << lp_bits; else pagesize = 1UL << sp_bits; @@ -490,25 +390,26 @@ void alloc_init(void *pool, unsigned long poolsize) /* FIXME: small pages for last bit? */ /* Split first page into small pages. */ - assert(header_size << (1UL << lp_bits)); + assert(header_size < (1UL << lp_bits)); clear_bit(head->pagesize, 0); /* Skip over page(s) used by header, add rest to free list */ - for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits; - i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); + for (i = align_up(header_size, (1UL << sp_bits)) >> sp_bits; + i < SMALL_PAGES_PER_LARGE_PAGE; i++) { - ph = from_off(head, i<elements_used = 0; - add_small_page_to_freelist(head, ph); + add_small_page_to_freelist(head, ph, sp_bits); } /* Add the rest of the pages as large pages. */ - i = (1 << lp_bits); - while (i + (1 << lp_bits) <= poolsize) { - ph = from_off(head, i); + i = SMALL_PAGES_PER_LARGE_PAGE; + while ((i << sp_bits) + (1UL << lp_bits) <= poolsize) { + assert(i < MAX_SMALL_PAGES); + ph = from_pgnum(head, i, sp_bits); ph->elements_used = 0; - add_large_page_to_freelist(head, ph); - i += (1 << lp_bits); + add_large_page_to_freelist(head, ph, sp_bits); + i += SMALL_PAGES_PER_LARGE_PAGE; } } @@ -519,85 +420,286 @@ static void del_large_from_small_free_list(struct header *head, { unsigned long i; - for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) { + for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) { del_from_list(head, &head->small_free_list, - (void *)ph + (i << sp_bits)); + (void *)ph + (i << sp_bits), + sp_bits); } } -static bool all_empty(struct header *head, unsigned long off, unsigned sp_bits) +static bool all_empty(struct header *head, + unsigned long pgnum, + unsigned sp_bits) { unsigned long i; - for (i = 0; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) { - struct page_header *ph = from_off(head, off + (i << sp_bits)); + for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) { + struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits); if (ph->elements_used) return false; } return true; } -static unsigned long get_large_page(struct header *head, - unsigned long poolsize) +static void recombine_small_pages(struct header *head, unsigned long poolsize, + unsigned int sp_bits) { - unsigned long lp_bits, sp_bits, i, page; - - page = pop_from_list(head, &head->large_free_list); - if (likely(page)) - return page; + unsigned long i; + unsigned int lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; /* Look for small pages to coalesce, after first large page. */ - lp_bits = large_page_bits(poolsize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; - - for (i = (1 << lp_bits); i < poolsize; i += (1 << lp_bits)) { + for (i = SMALL_PAGES_PER_LARGE_PAGE; + i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE; + i += SMALL_PAGES_PER_LARGE_PAGE) { /* Already a large page? */ - if (test_bit(head->pagesize, i >> lp_bits)) + if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE)) continue; if (all_empty(head, i, sp_bits)) { - struct page_header *ph = from_off(head, i); - set_bit(head->pagesize, i >> lp_bits); + struct page_header *ph = from_pgnum(head, i, sp_bits); + set_bit(head->pagesize, + i / SMALL_PAGES_PER_LARGE_PAGE); del_large_from_small_free_list(head, ph, sp_bits); - add_large_page_to_freelist(head, ph); + add_large_page_to_freelist(head, ph, sp_bits); } } - - return pop_from_list(head, &head->large_free_list); +} + +static u16 get_large_page(struct header *head, unsigned long poolsize, + unsigned int sp_bits) +{ + unsigned int lp_bits, page; + + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + page = pop_from_list(head, &head->large_free_list, sp_bits); + if (likely(page)) + return page; + + recombine_small_pages(head, poolsize, sp_bits); + + return pop_from_list(head, &head->large_free_list, sp_bits); } /* Returns small page. */ static unsigned long break_up_large_page(struct header *head, - unsigned long psize, - unsigned long lpage) + unsigned int sp_bits, + u16 lpage) { - unsigned long lp_bits, sp_bits, i; + unsigned int i; - lp_bits = large_page_bits(psize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; - clear_bit(head->pagesize, lpage >> lp_bits); + clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE); - for (i = 1; i < (1 << BITS_FROM_SMALL_TO_LARGE_PAGE); i++) - add_small_page_to_freelist(head, - from_off(head, - lpage + (i<elements_used = 0; + add_small_page_to_freelist(head, ph, sp_bits); + } return lpage; } -static unsigned long get_small_page(struct header *head, - unsigned long poolsize) +static u16 get_small_page(struct header *head, unsigned long poolsize, + unsigned int sp_bits) { - unsigned long ret; + u16 ret; - ret = pop_from_list(head, &head->small_free_list); + ret = pop_from_list(head, &head->small_free_list, sp_bits); if (likely(ret)) return ret; - ret = get_large_page(head, poolsize); + ret = get_large_page(head, poolsize, sp_bits); if (likely(ret)) - ret = break_up_large_page(head, poolsize, ret); + ret = break_up_large_page(head, sp_bits, ret); return ret; } +static bool huge_allocated(struct header *head, unsigned long offset) +{ + unsigned long i; + struct huge_alloc *ha; + + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (ha->off <= offset && ha->off + ha->len > offset) + return true; + } + return false; +} + +/* They want something really big. Aim for contiguous pages (slow). */ +static COLD void *huge_alloc(void *pool, unsigned long poolsize, + unsigned long size, unsigned long align) +{ + struct header *head = pool; + struct huge_alloc *ha; + unsigned long i, sp_bits, lp_bits, num, header_size; + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + /* Allocate tracking structure optimistically. */ + ha = alloc_get(pool, poolsize, sizeof(*ha), ALIGNOF(*ha)); + if (!ha) + return NULL; + + /* First search for contiguous small pages... */ + header_size = sizeof(*head) + sizeof(head->bs) * (max_bucket(lp_bits)-1); + + num = 0; + for (i = (header_size + (1UL << sp_bits) - 1) >> sp_bits; + i << sp_bits < poolsize; + i++) { + struct page_header *pg; + unsigned long off = (i << sp_bits); + + /* Skip over large pages. */ + if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + i += (1UL << BITS_FROM_SMALL_TO_LARGE_PAGE)-1; + continue; + } + + /* Does this page meet alignment requirements? */ + if (!num && off % align != 0) + continue; + + /* FIXME: This makes us O(n^2). */ + if (huge_allocated(head, off)) { + num = 0; + continue; + } + + pg = (struct page_header *)((char *)head + off); + if (pg->elements_used) { + num = 0; + continue; + } + + num++; + if (num << sp_bits >= size) { + unsigned long pgnum; + + /* Remove from free list. */ + for (pgnum = i; pgnum > i - num; pgnum--) { + pg = from_pgnum(head, pgnum, sp_bits); + del_from_list(head, + &head->small_free_list, + pg, sp_bits); + } + ha->off = (i - num + 1) << sp_bits; + ha->len = num << sp_bits; + goto done; + } + } + + /* Now search for large pages... */ + recombine_small_pages(head, poolsize, sp_bits); + + num = 0; + for (i = (header_size + (1UL << lp_bits) - 1) >> lp_bits; + (i << lp_bits) < poolsize; i++) { + struct page_header *pg; + unsigned long off = (i << lp_bits); + + /* Ignore small pages. */ + if (!test_bit(head->pagesize, i)) + continue; + + /* Does this page meet alignment requirements? */ + if (!num && off % align != 0) + continue; + + /* FIXME: This makes us O(n^2). */ + if (huge_allocated(head, off)) { + num = 0; + continue; + } + + pg = (struct page_header *)((char *)head + off); + if (pg->elements_used) { + num = 0; + continue; + } + + num++; + if (num << lp_bits >= size) { + unsigned long pgnum; + + /* Remove from free list. */ + for (pgnum = i; pgnum > i - num; pgnum--) { + pg = from_pgnum(head, pgnum, lp_bits); + del_from_list(head, + &head->large_free_list, + pg, sp_bits); + } + ha->off = (i - num + 1) << lp_bits; + ha->len = num << lp_bits; + goto done; + } + } + + /* Unable to satisfy: free huge alloc structure. */ + alloc_free(pool, poolsize, ha); + return NULL; + +done: + add_to_huge_list(pool, ha); + return (char *)pool + ha->off; +} + +static COLD void +huge_free(struct header *head, unsigned long poolsize, void *free) +{ + unsigned long i, off, pgnum, free_off = (char *)free - (char *)head; + unsigned int sp_bits, lp_bits; + struct huge_alloc *ha; + + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (free_off == ha->off) + break; + } + assert(i); + + /* Free up all the pages, delete and free ha */ + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + pgnum = free_off >> sp_bits; + + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + for (off = ha->off; + off < ha->off + ha->len; + off += 1UL << lp_bits) { + add_large_page_to_freelist(head, + (void *)((char *)head + off), + sp_bits); + } + } else { + for (off = ha->off; + off < ha->off + ha->len; + off += 1UL << sp_bits) { + add_small_page_to_freelist(head, + (void *)((char *)head + off), + sp_bits); + } + } + del_from_huge(head, ha); + alloc_free(head, poolsize, ha); +} + +static COLD unsigned long huge_size(struct header *head, void *p) +{ + unsigned long i, off = (char *)p - (char *)head; + struct huge_alloc *ha; + + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (off == ha->off) { + return ha->len; + } + } + abort(); +} + void *alloc_get(void *pool, unsigned long poolsize, unsigned long size, unsigned long align) { @@ -606,6 +708,7 @@ void *alloc_get(void *pool, unsigned long poolsize, unsigned long i; struct bucket_state *bs; struct page_header *ph; + unsigned int sp_bits; if (poolsize < MIN_USEFUL_SIZE) { return tiny_alloc_get(pool, poolsize, size, align); @@ -616,9 +719,10 @@ void *alloc_get(void *pool, unsigned long poolsize, size = 1; bucket = size_to_bucket(size); - if (bucket >= max_bucket(large_page_bits(poolsize))) { - /* FIXME: huge alloc. */ - return NULL; + sp_bits = small_page_bits(poolsize); + + if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) { + return huge_alloc(pool, poolsize, size, align); } bs = &head->bs[bucket]; @@ -626,21 +730,23 @@ void *alloc_get(void *pool, unsigned long poolsize, if (!bs->page_list) { struct page_header *ph; - if (large_page_bucket(bucket, poolsize)) - bs->page_list = get_large_page(head, poolsize); + if (large_page_bucket(bucket, sp_bits)) + bs->page_list = get_large_page(head, poolsize, + sp_bits); else - bs->page_list = get_small_page(head, poolsize); + bs->page_list = get_small_page(head, poolsize, + sp_bits); /* FIXME: Try large-aligned alloc? Header stuffing? */ if (unlikely(!bs->page_list)) return NULL; - ph = from_off(head, bs->page_list); + ph = from_pgnum(head, bs->page_list, sp_bits); ph->bucket = bucket; ph->elements_used = 0; ph->next = 0; memset(ph->used, 0, used_size(bs->elements_per_page)); } - ph = from_off(head, bs->page_list); + ph = from_pgnum(head, bs->page_list, sp_bits); i = find_free_bit(ph->used); set_bit(ph->used, i); @@ -648,8 +754,8 @@ void *alloc_get(void *pool, unsigned long poolsize, /* check if this page is now full */ if (unlikely(ph->elements_used == bs->elements_per_page)) { - del_from_bucket_list(head, bs, ph); - add_to_bucket_full_list(head, bs, ph); + del_from_bucket_list(head, bs, ph, sp_bits); + add_to_bucket_full_list(head, bs, ph, sp_bits); } return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE, @@ -661,33 +767,41 @@ void alloc_free(void *pool, unsigned long poolsize, void *free) { struct header *head = pool; struct bucket_state *bs; - unsigned int pagebits; - unsigned long i, pgoffset, offset = (char *)free - (char *)pool; + unsigned int sp_bits; + unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool; bool smallpage; struct page_header *ph; if (poolsize < MIN_USEFUL_SIZE) { return tiny_alloc_free(pool, poolsize, free); } - + /* Get page header. */ - pagebits = large_page_bits(poolsize); - if (!test_bit(head->pagesize, offset >> pagebits)) { - smallpage = true; - pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE; - } else + sp_bits = small_page_bits(poolsize); + pgnum = offset >> sp_bits; + + /* Big page? Round down further. */ + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { smallpage = false; + pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1); + } else + smallpage = true; /* Step back to page header. */ - ph = from_off(head, offset & ~((1UL << pagebits) - 1)); + ph = from_pgnum(head, pgnum, sp_bits); + if ((void *)ph == free) { + huge_free(head, poolsize, free); + return; + } + bs = &head->bs[ph->bucket]; - pgoffset = (offset & ((1UL << pagebits) - 1)) + pgoffset = offset - (pgnum << sp_bits) - page_header_size(ph->bucket / INTER_BUCKET_SPACE, bs->elements_per_page); if (unlikely(ph->elements_used == bs->elements_per_page)) { - del_from_bucket_full_list(head, bs, ph); - add_to_bucket_list(head, bs, ph); + del_from_bucket_full_list(head, bs, ph, sp_bits); + add_to_bucket_list(head, bs, ph, sp_bits); } /* Which element are we? */ @@ -697,18 +811,18 @@ void alloc_free(void *pool, unsigned long poolsize, void *free) if (unlikely(ph->elements_used == 0)) { bs = &head->bs[ph->bucket]; - del_from_bucket_list(head, bs, ph); + del_from_bucket_list(head, bs, ph, sp_bits); if (smallpage) - add_small_page_to_freelist(head, ph); + add_small_page_to_freelist(head, ph, sp_bits); else - add_large_page_to_freelist(head, ph); + add_large_page_to_freelist(head, ph, sp_bits); } } unsigned long alloc_size(void *pool, unsigned long poolsize, void *p) { struct header *head = pool; - unsigned int pagebits; + unsigned int pgnum, sp_bits; unsigned long offset = (char *)p - (char *)pool; struct page_header *ph; @@ -716,12 +830,18 @@ unsigned long alloc_size(void *pool, unsigned long poolsize, void *p) return tiny_alloc_size(pool, poolsize, p); /* Get page header. */ - pagebits = large_page_bits(poolsize); - if (!test_bit(head->pagesize, offset >> pagebits)) - pagebits -= BITS_FROM_SMALL_TO_LARGE_PAGE; + sp_bits = small_page_bits(poolsize); + pgnum = offset >> sp_bits; + + /* Big page? Round down further. */ + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) + pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1); /* Step back to page header. */ - ph = from_off(head, offset & ~((1UL << pagebits) - 1)); + ph = from_pgnum(head, pgnum, sp_bits); + if ((void *)ph == p) + return huge_size(head, p); + return bucket_to_size(ph->bucket); } @@ -748,11 +868,18 @@ static unsigned long count_bits(const unsigned long bitmap[], return count; } -static bool out_of_bounds(unsigned long off, +static bool out_of_bounds(unsigned long pgnum, + unsigned int sp_bits, unsigned long pagesize, unsigned long poolsize) { - return (off > poolsize || off + pagesize > poolsize); + if (((pgnum << sp_bits) >> sp_bits) != pgnum) + return true; + + if ((pgnum << sp_bits) > poolsize) + return true; + + return ((pgnum << sp_bits) + pagesize > poolsize); } static bool check_bucket(struct header *head, @@ -761,12 +888,14 @@ static bool check_bucket(struct header *head, struct bucket_state *bs, unsigned int bindex) { - bool lp_bucket = large_page_bucket(bindex, poolsize); + bool lp_bucket; struct page_header *ph; unsigned long taken, i, prev, pagesize, sp_bits, lp_bits; - lp_bits = large_page_bits(poolsize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + lp_bucket = large_page_bucket(bindex, sp_bits); pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits); @@ -788,22 +917,23 @@ static bool check_bucket(struct header *head, prev = 0; for (i = bs->page_list; i; i = ph->next) { /* Bad pointer? */ - if (out_of_bounds(i, pagesize, poolsize)) + if (out_of_bounds(i, sp_bits, pagesize, poolsize)) return check_fail(); /* Wrong size page? */ - if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket) + if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE) + != lp_bucket) return check_fail(); - /* Not page boundary? */ - if (i % pagesize) + /* Large page not on boundary? */ + if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0) return check_fail(); - ph = from_off(head, i); + ph = from_pgnum(head, i, sp_bits); /* Linked list corrupt? */ if (ph->prev != prev) return check_fail(); /* Already seen this page? */ - if (test_bit(pages, i >> sp_bits)) + if (test_bit(pages, i)) return check_fail(); - set_bit(pages, i >> sp_bits); + set_bit(pages, i); /* Empty or full? */ if (ph->elements_used == 0) return check_fail(); @@ -823,22 +953,22 @@ static bool check_bucket(struct header *head, prev = 0; for (i = bs->full_list; i; i = ph->next) { /* Bad pointer? */ - if (out_of_bounds(i, pagesize, poolsize)) + if (out_of_bounds(i, sp_bits, pagesize, poolsize)) return check_fail(); /* Wrong size page? */ - if (!!test_bit(head->pagesize, i >> lp_bits) != lp_bucket) - return check_fail(); - /* Not page boundary? */ - if (i % pagesize) + if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE) + != lp_bucket) + /* Large page not on boundary? */ + if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0) return check_fail(); - ph = from_off(head, i); + ph = from_pgnum(head, i, sp_bits); /* Linked list corrupt? */ if (ph->prev != prev) return check_fail(); /* Already seen this page? */ - if (test_bit(pages, i >> sp_bits)) + if (test_bit(pages, i)) return check_fail(); - set_bit(pages, i >> sp_bits); + set_bit(pages, i); /* Not full? */ if (ph->elements_used != bs->elements_per_page) return check_fail(); @@ -859,14 +989,14 @@ bool alloc_check(void *pool, unsigned long poolsize) struct header *head = pool; unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets; struct page_header *ph; - unsigned long pages[(MAX_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE) - / BITS_PER_LONG] = { 0 }; + struct huge_alloc *ha; + unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 }; if (poolsize < MIN_USEFUL_SIZE) return tiny_alloc_check(pool, poolsize); - lp_bits = large_page_bits(poolsize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; num_buckets = max_bucket(lp_bits); @@ -880,22 +1010,19 @@ bool alloc_check(void *pool, unsigned long poolsize) prev = 0; for (i = head->small_free_list; i; i = ph->next) { /* Bad pointer? */ - if (out_of_bounds(i, 1 << sp_bits, poolsize)) + if (out_of_bounds(i, sp_bits, 1UL << sp_bits, poolsize)) return check_fail(); /* Large page? */ - if (test_bit(head->pagesize, i >> lp_bits)) - return check_fail(); - /* Not page boundary? */ - if (i % (1 << sp_bits)) + if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) return check_fail(); - ph = from_off(head, i); + ph = from_pgnum(head, i, sp_bits); /* Linked list corrupt? */ if (ph->prev != prev) return check_fail(); /* Already seen this page? */ - if (test_bit(pages, i >> sp_bits)) + if (test_bit(pages, i)) return check_fail(); - set_bit(pages, i >> sp_bits); + set_bit(pages, i); prev = i; } @@ -903,22 +1030,22 @@ bool alloc_check(void *pool, unsigned long poolsize) prev = 0; for (i = head->large_free_list; i; i = ph->next) { /* Bad pointer? */ - if (out_of_bounds(i, 1 << lp_bits, poolsize)) + if (out_of_bounds(i, sp_bits, 1UL << lp_bits, poolsize)) return check_fail(); /* Not large page? */ - if (!test_bit(head->pagesize, i >> lp_bits)) + if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) return check_fail(); /* Not page boundary? */ - if (i % (1 << lp_bits)) + if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0) return check_fail(); - ph = from_off(head, i); + ph = from_pgnum(head, i, sp_bits); /* Linked list corrupt? */ if (ph->prev != prev) return check_fail(); /* Already seen this page? */ - if (test_bit(pages, i >> sp_bits)) + if (test_bit(pages, i)) return check_fail(); - set_bit(pages, i >> sp_bits); + set_bit(pages, i); prev = i; } @@ -930,6 +1057,46 @@ bool alloc_check(void *pool, unsigned long poolsize) return false; } + /* Check the huge alloc list. */ + prev = 0; + for (i = head->huge; i; i = ha->next) { + unsigned long pgbits, j; + + /* Bad pointer? */ + if (i >= poolsize || i + sizeof(*ha) > poolsize) + return check_fail(); + ha = (void *)((char *)head + i); + + /* Check contents of ha. */ + if (ha->off > poolsize || ha->off + ha->len > poolsize) + return check_fail(); + + /* Large or small page? */ + pgbits = test_bit(head->pagesize, ha->off >> lp_bits) + ? lp_bits : sp_bits; + + /* Not page boundary? */ + if ((ha->off % (1UL << pgbits)) != 0) + return check_fail(); + + /* Not page length? */ + if ((ha->len % (1UL << pgbits)) != 0) + return check_fail(); + + /* Linked list corrupt? */ + if (ha->prev != prev) + return check_fail(); + + for (j = ha->off; j < ha->off + ha->len; j += (1UL<> sp_bits)) + return check_fail(); + set_bit(pages, j >> sp_bits); + } + + prev = i; + } + /* Make sure every page accounted for. */ for (i = 0; i < poolsize >> sp_bits; i++) { if (!test_bit(pages, i)) @@ -937,7 +1104,7 @@ bool alloc_check(void *pool, unsigned long poolsize) if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { /* Large page, skip rest. */ - i += (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) - 1; + i += SMALL_PAGES_PER_LARGE_PAGE - 1; } } @@ -954,25 +1121,27 @@ static unsigned long print_overhead(FILE *out, const char *desc, } static unsigned long count_list(struct header *head, - unsigned long off, + u16 pgnum, + unsigned int sp_bits, unsigned long *total_elems) { struct page_header *p; unsigned long ret = 0; - while (off) { - p = from_off(head, off); + while (pgnum) { + p = from_pgnum(head, pgnum, sp_bits); if (total_elems) (*total_elems) += p->elements_used; ret++; - off = p->next; + pgnum = p->next; } return ret; } static unsigned long visualize_bucket(FILE *out, struct header *head, unsigned int bucket, - unsigned long poolsize) + unsigned long poolsize, + unsigned int sp_bits) { unsigned long num_full, num_partial, num_pages, page_size, elems, hdr_min, hdr_size, elems_per_page, overhead = 0; @@ -987,8 +1156,10 @@ static unsigned long visualize_bucket(FILE *out, struct header *head, elems_per_page); elems = 0; - num_full = count_list(head, head->bs[bucket].full_list, &elems); - num_partial = count_list(head, head->bs[bucket].page_list, &elems); + num_full = count_list(head, head->bs[bucket].full_list, sp_bits, + &elems); + num_partial = count_list(head, head->bs[bucket].page_list, sp_bits, + &elems); num_pages = num_full + num_partial; if (!num_pages) return 0; @@ -1003,9 +1174,9 @@ static unsigned long visualize_bucket(FILE *out, struct header *head, overhead += print_overhead(out, "page post-header alignments", (hdr_size - hdr_min) * num_pages, poolsize); /* Between last element and end of page. */ - page_size = 1UL << large_page_bits(poolsize); - if (!large_page_bucket(bucket, poolsize)) - page_size >>= BITS_FROM_SMALL_TO_LARGE_PAGE; + page_size = (1UL << sp_bits); + if (large_page_bucket(bucket, sp_bits)) + page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE; overhead += print_overhead(out, "page tails", (page_size - (hdr_size @@ -1015,10 +1186,6 @@ static unsigned long visualize_bucket(FILE *out, struct header *head, return overhead; } -static void tiny_alloc_visualize(FILE *out, void *pool, unsigned long poolsize) -{ -} - void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) { struct header *head = pool; @@ -1033,8 +1200,8 @@ void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) return; } - lp_bits = large_page_bits(poolsize); - sp_bits = lp_bits - BITS_FROM_SMALL_TO_LARGE_PAGE; + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; num_buckets = max_bucket(lp_bits); header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1); @@ -1042,30 +1209,30 @@ void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) fprintf(out, "Large page size %lu, small page size %lu.\n", 1UL << lp_bits, 1UL << sp_bits); overhead += print_overhead(out, "unused pool tail", - poolsize % (1 << lp_bits), poolsize); + poolsize % (1UL << lp_bits), poolsize); fprintf(out, "Main header %lu bytes (%lu small pages).\n", - header_size, align_up(header_size, 1 << sp_bits) >> sp_bits); + header_size, align_up(header_size, 1UL << sp_bits) >> sp_bits); overhead += print_overhead(out, "partial header page", - align_up(header_size, 1 << sp_bits) + align_up(header_size, 1UL << sp_bits) - header_size, poolsize); /* Total large pages. */ i = count_bits(head->pagesize, poolsize >> lp_bits); /* Used pages. */ - count = i - count_list(head, head->large_free_list, NULL); + count = i - count_list(head, head->large_free_list, sp_bits, NULL); fprintf(out, "%lu/%lu large pages used (%.3g%%)\n", count, i, count ? 100.0 * count / i : 0.0); /* Total small pages. */ - i = (poolsize >> lp_bits) - i; + i = ((poolsize >> lp_bits) - i) << BITS_FROM_SMALL_TO_LARGE_PAGE; /* Used pages */ - count = i - count_list(head, head->small_free_list, NULL); + count = i - count_list(head, head->small_free_list, sp_bits, NULL); fprintf(out, "%lu/%lu small pages used (%.3g%%)\n", count, i, count ? 100.0 * count / i : 0.0); /* Summary of each bucket. */ fprintf(out, "%lu buckets:\n", num_buckets); for (i = 0; i < num_buckets; i++) - overhead += visualize_bucket(out, head, i, poolsize); + overhead += visualize_bucket(out, head, i, poolsize, sp_bits); print_overhead(out, "total", overhead, poolsize); }