X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Falloc%2Falloc.c;h=64475078c0a6811f88b5dcf9861492a117a2ff41;hp=d5accb40a581308b60ca76e233ee73968bcf9a2b;hb=a40b318e7a07a452ae7456053727bd11b2fa49b4;hpb=a8b248ea9de55316cac4423a99a727ca7b54e0fc diff --git a/ccan/alloc/alloc.c b/ccan/alloc/alloc.c index d5accb40..64475078 100644 --- a/ccan/alloc/alloc.c +++ b/ccan/alloc/alloc.c @@ -5,1162 +5,1234 @@ #include #include #include "alloc.h" +#include "bitops.h" +#include "tiny.h" #include +#include #include +#include +#include #include "config.h" -/* FIXME: We assume getpagesize() doesnt change. Remapping file with - * different pagesize should still work. */ +/* + Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap: + http://samba.org/~tridge/junkcode/alloc_mmap/ + + Copyright (C) Andrew Tridgell 2007 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* We divide the pool into this many large pages (nearest power of 2) */ +#define MAX_LARGE_PAGES (256UL) + +/* 32 small pages == 1 large page. */ +#define BITS_FROM_SMALL_TO_LARGE_PAGE 5 + +#define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE) + +/* Smallest pool size for this scheme: 128-byte small pages. That's + * 9/13% overhead for 32/64 bit. */ +#define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128) + +/* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */ +#define INTER_BUCKET_SPACE 4 + +#define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE) + +/* FIXME: Figure this out properly. */ +#define MAX_SIZE (1 << 30) + +/* How few object to fit in a page before using a larger one? (8) */ +#define MAX_PAGE_OBJECT_ORDER 3 -/* FIXME: Doesn't handle non-page-aligned poolsize. */ +#define BITS_PER_LONG (sizeof(long) * CHAR_BIT) -/* FIXME: Reduce. */ -#define MIN_SIZE (getpagesize() * 2) +struct bucket_state { + u32 elements_per_page; + u16 page_list; + u16 full_list; +}; + +struct header { + /* Bitmap of which pages are large. */ + unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG]; + + /* List of unused small/large pages. */ + u16 small_free_list; + u16 large_free_list; -/* What's the granularity of sub-page allocs? */ -#define BITMAP_GRANULARITY 4 + /* List of huge allocs. */ + unsigned long huge; + + /* This is less defined: we have two buckets for each power of 2 */ + struct bucket_state bs[1]; +}; + +struct huge_alloc { + unsigned long next, prev; + unsigned long off, len; +}; + +struct page_header { + u16 next, prev; + /* FIXME: We can just count all-0 and all-1 used[] elements. */ + unsigned elements_used : 25; + unsigned bucket : 7; + unsigned long used[1]; /* One bit per element. */ +}; -/* File layout: +/* + * Every 4 buckets, the size doubles. + * Between buckets, sizes increase linearly. * - * file := pagestates pad uniform-cache metadata - * pagestates := pages * 2-bits-per-page - * pad := pad to next ALIGNOF(metaheader) + * eg. bucket 40 = 2^10 = 1024 + * bucket 41 = 2^10 + 2^10*4 = 1024 + 256 + * bucket 42 = 2^10 + 2^10*4 = 1024 + 512 + * bucket 43 = 2^10 + 2^10*4 = 1024 + 768 + * bucket 45 = 2^11 = 2048 * - * metadata := metalen next-ptr metabits - * metabits := freeblock | bitblock | uniformblock - * freeblock := FREE + - * bitblock := BITMAP + 2-bits-per-bit-in-page + pad-to-byte - * uniformblock := UNIFORM + 14-bit-byte-len + bits + pad-to-byte + * Care is taken to handle low numbered buckets, at cost of overflow. */ -#define UNIFORM_CACHE_NUM 16 -struct uniform_cache +static unsigned long bucket_to_size(unsigned int bucket) { - uint16_t size[UNIFORM_CACHE_NUM]; - /* These could be u32 if we're prepared to limit size. */ - unsigned long page[UNIFORM_CACHE_NUM]; -}; + unsigned long base = 1UL << (bucket / INTER_BUCKET_SPACE); + return base + ((bucket % INTER_BUCKET_SPACE) + << (bucket / INTER_BUCKET_SPACE)) + / INTER_BUCKET_SPACE; +} -struct metaheader +/* + * Say size is 10. + * fls(size/2) == 3. 1 << 3 == 8, so we're 2 too large, out of a possible + * 8 too large. That's 1/4 of the way to the next power of 2 == 1 bucket. + * + * We make sure we round up. Note that this fails on 32 bit at size + * 1879048193 (around bucket 120). + */ +static unsigned int size_to_bucket(unsigned long size) { - /* Next meta header, or 0 */ - unsigned long next; - /* Bits start here. */ -}; + unsigned int base = fls(size/2); + unsigned long overshoot; -/* Assumes a is a power of two. */ -static unsigned long align_up(unsigned long x, unsigned long a) -{ - return (x + a - 1) & ~(a - 1); + overshoot = size - (1UL << base); + return base * INTER_BUCKET_SPACE + + ((overshoot * INTER_BUCKET_SPACE + (1UL << base)-1) >> base); } -static unsigned long align_down(unsigned long x, unsigned long a) +static unsigned int small_page_bits(unsigned long poolsize) { - return x & ~(a - 1); + return fls(poolsize / MAX_SMALL_PAGES - 1); } -static unsigned long div_up(unsigned long x, unsigned long a) +static struct page_header *from_pgnum(struct header *head, + unsigned long pgnum, + unsigned sp_bits) { - return (x + a - 1) / a; + return (struct page_header *)((char *)head + (pgnum << sp_bits)); } -/* It turns out that we spend a lot of time dealing with bit pairs. - * These routines manipulate them. - */ -static uint8_t get_bit_pair(const uint8_t *bits, unsigned long index) +static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits) { - return bits[index * 2 / CHAR_BIT] >> (index * 2 % CHAR_BIT) & 3; + return ((char *)p - (char *)head) >> sp_bits; } -static void set_bit_pair(uint8_t *bits, unsigned long index, uint8_t val) +static size_t used_size(unsigned int num_elements) { - bits[index * 2 / CHAR_BIT] &= ~(3 << (index * 2 % CHAR_BIT)); - bits[index * 2 / CHAR_BIT] |= (val << (index * 2 % CHAR_BIT)); + return align_up(num_elements, BITS_PER_LONG) / CHAR_BIT; } -/* This is used for page states and subpage allocations */ -enum alloc_state +/* + * We always align the first entry to the lower power of 2. + * eg. the 12-byte bucket gets 8-byte aligned. The 4096-byte bucket + * gets 4096-byte aligned. + */ +static unsigned long page_header_size(unsigned int align_bits, + unsigned long num_elements) { - FREE, - TAKEN, - TAKEN_START, - SPECIAL, /* Sub-page allocation for page states. */ -}; + unsigned long size; -/* The types for subpage metadata. */ -enum sub_metadata_type -{ - /* FREE is same as alloc state */ - BITMAP = 1, /* bitmap allocated page */ - UNIFORM, /* uniform size allocated page */ -}; + size = sizeof(struct page_header) + - sizeof(((struct page_header *)0)->used) + + used_size(num_elements); + return align_up(size, 1UL << align_bits); +} -/* Page states are represented by bitpairs, at the start of the pool. */ -#define BITS_PER_PAGE 2 +static void add_to_list(struct header *head, + u16 *list, struct page_header *ph, unsigned sp_bits) +{ + unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits); -/* How much metadata info per byte? */ -#define METADATA_PER_BYTE (CHAR_BIT / 2) + ph->next = h; + if (h) { + struct page_header *prev = from_pgnum(head, h, sp_bits); + assert(prev->prev == 0); + prev->prev = offset; + } + *list = offset; + ph->prev = 0; +} -static uint8_t *get_page_statebits(const void *pool) +static void del_from_list(struct header *head, + u16 *list, struct page_header *ph, unsigned sp_bits) { - return (uint8_t *)pool + sizeof(struct uniform_cache); + /* Front of list? */ + if (ph->prev == 0) { + *list = ph->next; + } else { + struct page_header *prev = from_pgnum(head, ph->prev, sp_bits); + prev->next = ph->next; + } + if (ph->next != 0) { + struct page_header *next = from_pgnum(head, ph->next, sp_bits); + next->prev = ph->prev; + } } -static enum alloc_state get_page_state(const void *pool, unsigned long page) +static u16 pop_from_list(struct header *head, + u16 *list, + unsigned int sp_bits) { - return get_bit_pair(get_page_statebits(pool), page); + u16 h = *list; + struct page_header *ph = from_pgnum(head, h, sp_bits); + + if (likely(h)) { + *list = ph->next; + if (*list) + from_pgnum(head, *list, sp_bits)->prev = 0; + } + return h; } -static void set_page_state(void *pool, unsigned long page, enum alloc_state s) +static void add_to_huge_list(struct header *head, struct huge_alloc *ha) { - set_bit_pair(get_page_statebits(pool), page, s); + unsigned long h = head->huge; + unsigned long offset = (char *)ha - (char *)head; + + ha->next = h; + if (h) { + struct huge_alloc *prev = (void *)((char *)head + h); + assert(prev->prev == 0); + prev->prev = offset; + } + head->huge = offset; + ha->prev = 0; } -/* The offset of metadata for a subpage allocation is found at the end - * of the subpage */ -#define SUBPAGE_METAOFF (getpagesize() - sizeof(unsigned long)) - -/* This is the length of metadata in bits. It consists of two bits - * for every BITMAP_GRANULARITY of usable bytes in the page, then two - * bits for the tailer.. */ -#define BITMAP_METABITLEN \ - ((div_up(SUBPAGE_METAOFF, BITMAP_GRANULARITY) + 1) * BITS_PER_PAGE) - -/* This is the length in bytes. */ -#define BITMAP_METALEN (div_up(BITMAP_METABITLEN, CHAR_BIT)) - -static struct metaheader *first_mheader(void *pool, unsigned long poolsize) +static void del_from_huge(struct header *head, struct huge_alloc *ha) { - unsigned int pagestatelen; - - pagestatelen = align_up(div_up(poolsize/getpagesize() * BITS_PER_PAGE, - CHAR_BIT), - ALIGNOF(struct metaheader)); - return (struct metaheader *)(get_page_statebits(pool) + pagestatelen); + /* Front of list? */ + if (ha->prev == 0) { + head->huge = ha->next; + } else { + struct huge_alloc *prev = (void *)((char *)head + ha->prev); + prev->next = ha->next; + } + if (ha->next != 0) { + struct huge_alloc *next = (void *)((char *)head + ha->next); + next->prev = ha->prev; + } } -static struct metaheader *next_mheader(void *pool, struct metaheader *mh) +static void add_small_page_to_freelist(struct header *head, + struct page_header *ph, + unsigned int sp_bits) { - if (!mh->next) - return NULL; - - return (struct metaheader *)((char *)pool + mh->next); + add_to_list(head, &head->small_free_list, ph, sp_bits); } -static unsigned long pool_offset(void *pool, void *p) +static void add_large_page_to_freelist(struct header *head, + struct page_header *ph, + unsigned int sp_bits) { - return (char *)p - (char *)pool; + add_to_list(head, &head->large_free_list, ph, sp_bits); } -void alloc_init(void *pool, unsigned long poolsize) +static void add_to_bucket_list(struct header *head, + struct bucket_state *bs, + struct page_header *ph, + unsigned int sp_bits) { - /* FIXME: Alignment assumptions about pool. */ - unsigned long len, i; - struct metaheader *mh; - - if (poolsize < MIN_SIZE) - return; - - mh = first_mheader(pool, poolsize); - - /* Mark all page states FREE, all uniform caches zero, and all of - * metaheader bitmap which takes rest of first page. */ - len = align_up(pool_offset(pool, mh + 1), getpagesize()); - BUILD_ASSERT(FREE == 0); - memset(pool, 0, len); - - /* Mark the pagestate and metadata page(s) allocated. */ - set_page_state(pool, 0, TAKEN_START); - for (i = 1; i < div_up(len, getpagesize()); i++) - set_page_state(pool, i, TAKEN); + add_to_list(head, &bs->page_list, ph, sp_bits); } -/* Two bits per element, representing page states. Returns 0 on fail. - * off is used to allocate from subpage bitmaps, which use the first 2 - * bits as the type, so the real bitmap is offset by 1. */ -static unsigned long alloc_from_bitmap(uint8_t *bits, unsigned long off, - unsigned long elems, - unsigned long want, unsigned long align) +static void del_from_bucket_list(struct header *head, + struct bucket_state *bs, + struct page_header *ph, + unsigned int sp_bits) { - long i; - unsigned long free; - - free = 0; - /* We allocate from far end, to increase ability to expand metadata. */ - for (i = elems - 1; i >= 0; i--) { - switch (get_bit_pair(bits, off+i)) { - case FREE: - if (++free >= want) { - unsigned long j; - - /* They might ask for large alignment. */ - if (align && i % align) - continue; - - set_bit_pair(bits, off+i, TAKEN_START); - for (j = i+1; j < i + want; j++) - set_bit_pair(bits, off+j, TAKEN); - return off+i; - } - break; - case SPECIAL: - case TAKEN_START: - case TAKEN: - free = 0; - break; - } - } - - return 0; + del_from_list(head, &bs->page_list, ph, sp_bits); } -static unsigned long alloc_get_pages(void *pool, unsigned long poolsize, - unsigned long pages, unsigned long align) +static void del_from_bucket_full_list(struct header *head, + struct bucket_state *bs, + struct page_header *ph, + unsigned int sp_bits) { - return alloc_from_bitmap(get_page_statebits(pool), - 0, poolsize / getpagesize(), pages, - align / getpagesize()); + del_from_list(head, &bs->full_list, ph, sp_bits); } -/* Offset to metadata is at end of page. */ -static unsigned long *metadata_off(void *pool, unsigned long page) +static void add_to_bucket_full_list(struct header *head, + struct bucket_state *bs, + struct page_header *ph, + unsigned int sp_bits) { - return (unsigned long *) - ((char *)pool + (page+1)*getpagesize() - sizeof(unsigned long)); + add_to_list(head, &bs->full_list, ph, sp_bits); } -static uint8_t *get_page_metadata(void *pool, unsigned long page) +static void clear_bit(unsigned long bitmap[], unsigned int off) { - return (uint8_t *)pool + *metadata_off(pool, page); + bitmap[off / BITS_PER_LONG] &= ~(1UL << (off % BITS_PER_LONG)); } -static void set_page_metadata(void *pool, unsigned long page, uint8_t *meta) +static bool test_bit(const unsigned long bitmap[], unsigned int off) { - *metadata_off(pool, page) = meta - (uint8_t *)pool; + return bitmap[off / BITS_PER_LONG] & (1UL << (off % BITS_PER_LONG)); } -static unsigned long sub_page_alloc(void *pool, unsigned long page, - unsigned long size, unsigned long align) +static void set_bit(unsigned long bitmap[], unsigned int off) { - uint8_t *bits = get_page_metadata(pool, page); - unsigned long i; - enum sub_metadata_type type; - - type = get_bit_pair(bits, 0); - - /* If this is a uniform page, we can't allocate from it. */ - if (type == UNIFORM) - return 0; - - assert(type == BITMAP); - - /* We use a standart bitmap, but offset because of that BITMAP - * header. */ - i = alloc_from_bitmap(bits, 1, SUBPAGE_METAOFF/BITMAP_GRANULARITY, - div_up(size, BITMAP_GRANULARITY), - align / BITMAP_GRANULARITY); + bitmap[off / BITS_PER_LONG] |= (1UL << (off % BITS_PER_LONG)); +} - /* Can't allocate? */ - if (i == 0) - return 0; +/* There must be a bit to be found. */ +static unsigned int find_free_bit(const unsigned long bitmap[]) +{ + unsigned int i; - /* i-1 because of the header. */ - return page*getpagesize() + (i-1)*BITMAP_GRANULARITY; + for (i = 0; bitmap[i] == -1UL; i++); + return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1; } -/* We look at the page states to figure out where the allocation for this - * metadata ends. */ -static unsigned long get_metalen(void *pool, unsigned long poolsize, - struct metaheader *mh) +/* How many elements can we fit in a page? */ +static unsigned long elements_per_page(unsigned long align_bits, + unsigned long esize, + unsigned long psize) { - unsigned long i, first, pages = poolsize / getpagesize(); - - first = pool_offset(pool, mh + 1)/getpagesize(); + unsigned long num, overhead; - for (i = first + 1; i < pages && get_page_state(pool,i) == TAKEN; i++); + /* First approximation: no extra room for bitmap. */ + overhead = align_up(sizeof(struct page_header), 1UL << align_bits); + num = (psize - overhead) / esize; - return i * getpagesize() - pool_offset(pool, mh + 1); + while (page_header_size(align_bits, num) + esize * num > psize) + num--; + return num; } -static unsigned int uniform_metalen(unsigned int usize) +static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits) { - unsigned int metalen; - - assert(usize < (1 << 14)); + unsigned long max_smallsize; - /* Two bits for the header, 14 bits for size, then one bit for each - * element the page can hold. Round up to number of bytes. */ - metalen = div_up(2 + 14 + SUBPAGE_METAOFF / usize, CHAR_BIT); + /* Note: this doesn't take into account page header. */ + max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER; - /* To ensure metaheader is always aligned, round bytes up. */ - metalen = align_up(metalen, ALIGNOF(struct metaheader)); - - return metalen; + return bucket_to_size(bucket) > max_smallsize; } -static unsigned int decode_usize(uint8_t *meta) +static unsigned int max_bucket(unsigned int lp_bits) { - return ((unsigned)meta[1] << (CHAR_BIT-2)) | (meta[0] >> 2); + return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE; } -static void encode_usize(uint8_t *meta, unsigned int usize) +void alloc_init(void *pool, unsigned long poolsize) { - meta[0] = (UNIFORM | (usize << 2)); - meta[1] = (usize >> (CHAR_BIT - 2)); -} + struct header *head = pool; + struct page_header *ph; + unsigned int lp_bits, sp_bits, num_buckets; + unsigned long header_size, i; -static uint8_t *alloc_metaspace(void *pool, unsigned long poolsize, - struct metaheader *mh, unsigned long bytes, - enum sub_metadata_type type) -{ - uint8_t *meta = (uint8_t *)(mh + 1); - unsigned long free = 0, len, i, metalen; - - metalen = get_metalen(pool, poolsize, mh); - - /* Walk through metadata looking for free. */ - for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) { - switch (get_bit_pair(meta, i)) { - case FREE: - len = 1; - free++; - if (free == bytes * METADATA_PER_BYTE) { - /* Mark this as a bitmap. */ - set_bit_pair(meta, i - free + 1, type); - return meta + (i - free + 1)/METADATA_PER_BYTE; - } - break; - case BITMAP: - /* Skip over this allocated part. */ - len = BITMAP_METALEN * METADATA_PER_BYTE; - free = 0; - break; - case UNIFORM: - /* Figure metalen given usize. */ - len = decode_usize(meta + i / METADATA_PER_BYTE); - len = uniform_metalen(len) * METADATA_PER_BYTE; - free = 0; - break; - default: - assert(0); - return NULL; - } + if (poolsize < MIN_USEFUL_SIZE) { + tiny_alloc_init(pool, poolsize); + return; } - return NULL; -} -/* We need this many bytes of metadata. */ -static uint8_t *new_metadata(void *pool, unsigned long poolsize, - unsigned long bytes, enum sub_metadata_type type) -{ - struct metaheader *mh, *newmh; - unsigned long page; - uint8_t *meta; + /* We rely on page numbers fitting in 16 bit. */ + BUILD_ASSERT(MAX_SMALL_PAGES < 65536); + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; - for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)) - if ((meta = alloc_metaspace(pool, poolsize, mh, bytes, type))) - return meta; + num_buckets = max_bucket(lp_bits); - /* No room for metadata? Can we expand an existing one? */ - for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ - unsigned long nextpage; + head = pool; + header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1); - /* We start on this page. */ - nextpage = pool_offset(pool, (char *)(mh+1))/getpagesize(); - /* Iterate through any other pages we own. */ - while (get_page_state(pool, ++nextpage) == TAKEN); + memset(head, 0, header_size); + for (i = 0; i < num_buckets; i++) { + unsigned long pagesize; - /* Now, can we grab that page? */ - if (get_page_state(pool, nextpage) != FREE) - continue; + if (large_page_bucket(i, sp_bits)) + pagesize = 1UL << lp_bits; + else + pagesize = 1UL << sp_bits; - /* OK, expand metadata, do it again. */ - set_page_state(pool, nextpage, TAKEN); - BUILD_ASSERT(FREE == 0); - memset((char *)pool + nextpage*getpagesize(), 0, getpagesize()); - return alloc_metaspace(pool, poolsize, mh, bytes, type); + head->bs[i].elements_per_page + = elements_per_page(i / INTER_BUCKET_SPACE, + bucket_to_size(i), + pagesize); } - /* No metadata left at all? */ - page = alloc_get_pages(pool, poolsize, div_up(bytes, getpagesize()), 1); - if (!page) - return NULL; + /* They start as all large pages. */ + memset(head->pagesize, 0xFF, sizeof(head->pagesize)); + /* FIXME: small pages for last bit? */ + + /* Split first page into small pages. */ + assert(header_size < (1UL << lp_bits)); + clear_bit(head->pagesize, 0); + + /* Skip over page(s) used by header, add rest to free list */ + for (i = align_up(header_size, (1UL << sp_bits)) >> sp_bits; + i < SMALL_PAGES_PER_LARGE_PAGE; + i++) { + ph = from_pgnum(head, i, sp_bits); + ph->elements_used = 0; + add_small_page_to_freelist(head, ph, sp_bits); + } - newmh = (struct metaheader *)((char *)pool + page * getpagesize()); - BUILD_ASSERT(FREE == 0); - memset(newmh + 1, 0, getpagesize() - sizeof(*mh)); + /* Add the rest of the pages as large pages. */ + i = SMALL_PAGES_PER_LARGE_PAGE; + while ((i << sp_bits) + (1UL << lp_bits) <= poolsize) { + assert(i < MAX_SMALL_PAGES); + ph = from_pgnum(head, i, sp_bits); + ph->elements_used = 0; + add_large_page_to_freelist(head, ph, sp_bits); + i += SMALL_PAGES_PER_LARGE_PAGE; + } +} - /* Sew it into linked list */ - mh = first_mheader(pool,poolsize); - newmh->next = mh->next; - mh->next = pool_offset(pool, newmh); +/* A large page worth of small pages are free: delete them from free list. */ +static void del_large_from_small_free_list(struct header *head, + struct page_header *ph, + unsigned int sp_bits) +{ + unsigned long i; - return alloc_metaspace(pool, poolsize, newmh, bytes, type); + for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) { + del_from_list(head, &head->small_free_list, + (void *)ph + (i << sp_bits), + sp_bits); + } } -static void alloc_free_pages(void *pool, unsigned long pagenum) +static bool all_empty(struct header *head, + unsigned long pgnum, + unsigned sp_bits) { - assert(get_page_state(pool, pagenum) == TAKEN_START); - set_page_state(pool, pagenum, FREE); - while (get_page_state(pool, ++pagenum) == TAKEN) - set_page_state(pool, pagenum, FREE); + unsigned long i; + + for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) { + struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits); + if (ph->elements_used) + return false; + } + return true; } -static void maybe_transform_uniform_page(void *pool, unsigned long offset) +static void recombine_small_pages(struct header *head, unsigned long poolsize, + unsigned int sp_bits) { - /* FIXME: If possible and page isn't full, change to a bitmap */ + unsigned long i; + unsigned int lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + /* Look for small pages to coalesce, after first large page. */ + for (i = SMALL_PAGES_PER_LARGE_PAGE; + i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE; + i += SMALL_PAGES_PER_LARGE_PAGE) { + /* Already a large page? */ + if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE)) + continue; + if (all_empty(head, i, sp_bits)) { + struct page_header *ph = from_pgnum(head, i, sp_bits); + set_bit(head->pagesize, + i / SMALL_PAGES_PER_LARGE_PAGE); + del_large_from_small_free_list(head, ph, sp_bits); + add_large_page_to_freelist(head, ph, sp_bits); + } + } } -/* Returns 0 or the size of the uniform alloc to use */ -static unsigned long suitable_for_uc(unsigned long size, unsigned long align) +static u16 get_large_page(struct header *head, unsigned long poolsize, + unsigned int sp_bits) { - unsigned long num_elems, wastage, usize; - unsigned long bitmap_cost; + unsigned int lp_bits, page; - if (size == 0) - size = 1; + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; - /* Fix up silly alignments. */ - usize = align_up(size, align); + page = pop_from_list(head, &head->large_free_list, sp_bits); + if (likely(page)) + return page; - /* How many can fit in this page? */ - num_elems = SUBPAGE_METAOFF / usize; + recombine_small_pages(head, poolsize, sp_bits); - /* Can happen with bigger alignments. */ - if (!num_elems) - return 0; - - /* Usize maxes out at 14 bits. */ - if (usize >= (1 << 14)) - return 0; - - /* How many bytes would be left at the end? */ - wastage = SUBPAGE_METAOFF % usize; + return pop_from_list(head, &head->large_free_list, sp_bits); +} - /* If we can get a larger allocation within alignment constraints, we - * should do it, otherwise might as well leave wastage at the end. */ - usize += align_down(wastage / num_elems, align); +/* Returns small page. */ +static unsigned long break_up_large_page(struct header *head, + unsigned int sp_bits, + u16 lpage) +{ + unsigned int i; - /* Bitmap allocation costs 2 bits per BITMAP_GRANULARITY bytes, plus - * however much we waste in rounding up to BITMAP_GRANULARITY. */ - bitmap_cost = 2 * div_up(size, BITMAP_GRANULARITY) - + CHAR_BIT * (align_up(size, BITMAP_GRANULARITY) - size); + clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE); - /* Our cost is 1 bit, plus usize overhead */ - if (bitmap_cost < 1 + (usize - size) * CHAR_BIT) - return 0; + for (i = 1; i < SMALL_PAGES_PER_LARGE_PAGE; i++) { + struct page_header *ph = from_pgnum(head, lpage + i, sp_bits); + /* Initialize this: huge_alloc reads it. */ + ph->elements_used = 0; + add_small_page_to_freelist(head, ph, sp_bits); + } - return usize; + return lpage; } -static unsigned long uniform_alloc(void *pool, unsigned long poolsize, - struct uniform_cache *uc, - unsigned long ucnum) +static u16 get_small_page(struct header *head, unsigned long poolsize, + unsigned int sp_bits) { - uint8_t *metadata = get_page_metadata(pool, uc->page[ucnum]) + 2; - unsigned long i, max; - - /* Simple one-bit-per-object bitmap. */ - max = SUBPAGE_METAOFF / uc->size[ucnum]; - for (i = 0; i < max; i++) { - if (!(metadata[i / CHAR_BIT] & (1 << (i % CHAR_BIT)))) { - metadata[i / CHAR_BIT] |= (1 << (i % CHAR_BIT)); - return uc->page[ucnum] * getpagesize() - + i * uc->size[ucnum]; - } - } - - return 0; + u16 ret; + + ret = pop_from_list(head, &head->small_free_list, sp_bits); + if (likely(ret)) + return ret; + ret = get_large_page(head, poolsize, sp_bits); + if (likely(ret)) + ret = break_up_large_page(head, sp_bits, ret); + return ret; } -static unsigned long new_uniform_page(void *pool, unsigned long poolsize, - unsigned long usize) +static bool huge_allocated(struct header *head, unsigned long offset) { - unsigned long page, metalen; - uint8_t *metadata; - - page = alloc_get_pages(pool, poolsize, 1, 1); - if (page == 0) - return 0; - - metalen = uniform_metalen(usize); + unsigned long i; + struct huge_alloc *ha; - /* Get metadata for page. */ - metadata = new_metadata(pool, poolsize, metalen, UNIFORM); - if (!metadata) { - alloc_free_pages(pool, page); - return 0; + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (ha->off <= offset && ha->off + ha->len > offset) + return true; } + return false; +} - encode_usize(metadata, usize); +/* They want something really big. Aim for contiguous pages (slow). */ +static COLD void *huge_alloc(void *pool, unsigned long poolsize, + unsigned long size, unsigned long align) +{ + struct header *head = pool; + struct huge_alloc *ha; + unsigned long i, sp_bits, lp_bits, num, header_size; - BUILD_ASSERT(FREE == 0); - memset(metadata + 2, 0, metalen - 2); + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; - /* Actually, this is a subpage page now. */ - set_page_state(pool, page, SPECIAL); + /* Allocate tracking structure optimistically. */ + ha = alloc_get(pool, poolsize, sizeof(*ha), ALIGNOF(*ha)); + if (!ha) + return NULL; - /* Set metadata pointer for page. */ - set_page_metadata(pool, page, metadata); + /* First search for contiguous small pages... */ + header_size = sizeof(*head) + sizeof(head->bs) * (max_bucket(lp_bits)-1); - return page; -} + num = 0; + for (i = (header_size + (1UL << sp_bits) - 1) >> sp_bits; + i << sp_bits < poolsize; + i++) { + struct page_header *pg; + unsigned long off = (i << sp_bits); -static unsigned long alloc_sub_page(void *pool, unsigned long poolsize, - unsigned long size, unsigned long align) -{ - unsigned long i, usize; - uint8_t *metadata; - struct uniform_cache *uc = pool; - - usize = suitable_for_uc(size, align); - if (usize) { - /* Look for a uniform page. */ - for (i = 0; i < UNIFORM_CACHE_NUM; i++) { - if (uc->size[i] == usize) { - unsigned long ret; - ret = uniform_alloc(pool, poolsize, uc, i); - if (ret != 0) - return ret; - /* OK, that one is full, remove from cache. */ - uc->size[i] = 0; - break; - } + /* Skip over large pages. */ + if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + i += (1UL << BITS_FROM_SMALL_TO_LARGE_PAGE)-1; + continue; } - /* OK, try a new uniform page. Use random discard for now. */ - i = random() % UNIFORM_CACHE_NUM; - maybe_transform_uniform_page(pool, uc->page[i]); + /* Does this page meet alignment requirements? */ + if (!num && off % align != 0) + continue; - uc->page[i] = new_uniform_page(pool, poolsize, usize); - if (uc->page[i]) { - uc->size[i] = usize; - return uniform_alloc(pool, poolsize, uc, i); + /* FIXME: This makes us O(n^2). */ + if (huge_allocated(head, off)) { + num = 0; + continue; } - uc->size[i] = 0; - } - /* Look for partial page. */ - for (i = 0; i < poolsize / getpagesize(); i++) { - unsigned long ret; - if (get_page_state(pool, i) != SPECIAL) + pg = (struct page_header *)((char *)head + off); + if (pg->elements_used) { + num = 0; continue; + } - ret = sub_page_alloc(pool, i, size, align); - if (ret) - return ret; - } - - /* Create new SUBPAGE page. */ - i = alloc_get_pages(pool, poolsize, 1, 1); - if (i == 0) - return 0; + num++; + if (num << sp_bits >= size) { + unsigned long pgnum; - /* Get metadata for page. */ - metadata = new_metadata(pool, poolsize, BITMAP_METALEN, BITMAP); - if (!metadata) { - alloc_free_pages(pool, i); - return 0; + /* Remove from free list. */ + for (pgnum = i; pgnum > i - num; pgnum--) { + pg = from_pgnum(head, pgnum, sp_bits); + del_from_list(head, + &head->small_free_list, + pg, sp_bits); + } + ha->off = (i - num + 1) << sp_bits; + ha->len = num << sp_bits; + goto done; + } } - /* Actually, this is a subpage page now. */ - set_page_state(pool, i, SPECIAL); + /* Now search for large pages... */ + recombine_small_pages(head, poolsize, sp_bits); - /* Set metadata pointer for page. */ - set_page_metadata(pool, i, metadata); + num = 0; + for (i = (header_size + (1UL << lp_bits) - 1) >> lp_bits; + (i << lp_bits) < poolsize; i++) { + struct page_header *pg; + unsigned long off = (i << lp_bits); - /* Do allocation like normal */ - return sub_page_alloc(pool, i, size, align); -} - -static bool bitmap_page_is_empty(uint8_t *meta) -{ - unsigned int i; + /* Ignore small pages. */ + if (!test_bit(head->pagesize, i)) + continue; - /* Skip the header (first bit of metadata). */ - for (i = 1; i < SUBPAGE_METAOFF/BITMAP_GRANULARITY+1; i++) - if (get_bit_pair(meta, i) != FREE) - return false; + /* Does this page meet alignment requirements? */ + if (!num && off % align != 0) + continue; - return true; -} + /* FIXME: This makes us O(n^2). */ + if (huge_allocated(head, off)) { + num = 0; + continue; + } -static bool uniform_page_is_empty(uint8_t *meta) -{ - unsigned int i, metalen; + pg = (struct page_header *)((char *)head + off); + if (pg->elements_used) { + num = 0; + continue; + } - metalen = uniform_metalen(decode_usize(meta)); + num++; + if (num << lp_bits >= size) { + unsigned long pgnum; - /* Skip the header (first two bytes of metadata). */ - for (i = 2; i < metalen + 2; i++) { - BUILD_ASSERT(FREE == 0); - if (meta[i]) - return false; + /* Remove from free list. */ + for (pgnum = i; pgnum > i - num; pgnum--) { + pg = from_pgnum(head, pgnum, lp_bits); + del_from_list(head, + &head->large_free_list, + pg, sp_bits); + } + ha->off = (i - num + 1) << lp_bits; + ha->len = num << lp_bits; + goto done; + } } - return true; -} -static bool special_page_is_empty(void *pool, unsigned long page) -{ - uint8_t *meta; - enum sub_metadata_type type; - - meta = get_page_metadata(pool, page); - type = get_bit_pair(meta, 0); - - switch (type) { - case UNIFORM: - return uniform_page_is_empty(meta); - case BITMAP: - return bitmap_page_is_empty(meta); - default: - assert(0); - } -} + /* Unable to satisfy: free huge alloc structure. */ + alloc_free(pool, poolsize, ha); + return NULL; -static void clear_special_metadata(void *pool, unsigned long page) -{ - uint8_t *meta; - enum sub_metadata_type type; - - meta = get_page_metadata(pool, page); - type = get_bit_pair(meta, 0); - - switch (type) { - case UNIFORM: - /* First two bytes are the header, rest is already FREE */ - BUILD_ASSERT(FREE == 0); - memset(meta, 0, 2); - break; - case BITMAP: - /* First two bits is the header. */ - BUILD_ASSERT(BITMAP_METALEN > 1); - meta[0] = 0; - break; - default: - assert(0); - } +done: + add_to_huge_list(pool, ha); + return (char *)pool + ha->off; } -/* Returns true if we cleaned any pages. */ -static bool clean_empty_subpages(void *pool, unsigned long poolsize) +static COLD void +huge_free(struct header *head, unsigned long poolsize, void *free) { - unsigned long i; - bool progress = false; - - for (i = 0; i < poolsize/getpagesize(); i++) { - if (get_page_state(pool, i) != SPECIAL) - continue; + unsigned long i, off, pgnum, free_off = (char *)free - (char *)head; + unsigned int sp_bits, lp_bits; + struct huge_alloc *ha; - if (special_page_is_empty(pool, i)) { - clear_special_metadata(pool, i); - set_page_state(pool, i, FREE); - progress = true; + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (free_off == ha->off) + break; + } + assert(i); + + /* Free up all the pages, delete and free ha */ + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + pgnum = free_off >> sp_bits; + + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + for (off = ha->off; + off < ha->off + ha->len; + off += 1UL << lp_bits) { + add_large_page_to_freelist(head, + (void *)((char *)head + off), + sp_bits); + } + } else { + for (off = ha->off; + off < ha->off + ha->len; + off += 1UL << sp_bits) { + add_small_page_to_freelist(head, + (void *)((char *)head + off), + sp_bits); } } - return progress; + del_from_huge(head, ha); + alloc_free(head, poolsize, ha); } -/* Returns true if we cleaned any pages. */ -static bool clean_metadata(void *pool, unsigned long poolsize) +static COLD unsigned long huge_size(struct header *head, void *p) { - struct metaheader *mh, *prev_mh = NULL; - bool progress = false; - - for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ - uint8_t *meta; - long i; - unsigned long metalen = get_metalen(pool, poolsize, mh); - - meta = (uint8_t *)(mh + 1); - BUILD_ASSERT(FREE == 0); - for (i = metalen - 1; i > 0; i--) - if (meta[i] != 0) - break; - - /* Completely empty? */ - if (prev_mh && i == metalen) { - alloc_free_pages(pool, - pool_offset(pool, mh)/getpagesize()); - prev_mh->next = mh->next; - mh = prev_mh; - progress = true; - } else { - uint8_t *p; - - /* Some pages at end are free? */ - for (p = (uint8_t *)(mh+1) + metalen - getpagesize(); - p > meta + i; - p -= getpagesize()) { - set_page_state(pool, - pool_offset(pool, p) - / getpagesize(), - FREE); - progress = true; - } + unsigned long i, off = (char *)p - (char *)head; + struct huge_alloc *ha; + + for (i = head->huge; i; i = ha->next) { + ha = (void *)((char *)head + i); + if (off == ha->off) { + return ha->len; } } - - return progress; + abort(); } void *alloc_get(void *pool, unsigned long poolsize, unsigned long size, unsigned long align) { - bool subpage_clean = false, metadata_clean = false; - unsigned long ret; - - if (poolsize < MIN_SIZE) - return NULL; - -again: - /* Sub-page allocations have an overhead of ~12%. */ - if (size + size/8 >= getpagesize() || align >= getpagesize()) { - unsigned long pages = div_up(size, getpagesize()); - - ret = alloc_get_pages(pool, poolsize, pages, align) - * getpagesize(); - } else - ret = alloc_sub_page(pool, poolsize, size, align); - - if (ret != 0) - return (char *)pool + ret; - - /* Allocation failed: garbage collection. */ - if (!subpage_clean) { - subpage_clean = true; - if (clean_empty_subpages(pool, poolsize)) - goto again; - } + struct header *head = pool; + unsigned int bucket; + unsigned long i; + struct bucket_state *bs; + struct page_header *ph; + unsigned int sp_bits; - if (!metadata_clean) { - metadata_clean = true; - if (clean_metadata(pool, poolsize)) - goto again; + if (poolsize < MIN_USEFUL_SIZE) { + return tiny_alloc_get(pool, poolsize, size, align); } - /* FIXME: Compact metadata? */ - return NULL; -} - -static void bitmap_free(void *pool, unsigned long pagenum, unsigned long off, - uint8_t *metadata) -{ - assert(off % BITMAP_GRANULARITY == 0); + size = align_up(size, align); + if (unlikely(!size)) + size = 1; + bucket = size_to_bucket(size); - off /= BITMAP_GRANULARITY; + sp_bits = small_page_bits(poolsize); - /* Offset by one because first bit is used for header. */ - off++; + if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) { + return huge_alloc(pool, poolsize, size, align); + } - set_bit_pair(metadata, off++, FREE); - while (off < SUBPAGE_METAOFF / BITMAP_GRANULARITY - && get_bit_pair(metadata, off) == TAKEN) - set_bit_pair(metadata, off++, FREE); -} + bs = &head->bs[bucket]; -static void uniform_free(void *pool, unsigned long pagenum, unsigned long off, - uint8_t *metadata) -{ - unsigned int usize, bit; + if (!bs->page_list) { + struct page_header *ph; - usize = decode_usize(metadata); - /* Must have been this size. */ - assert(off % usize == 0); - bit = off / usize; + if (large_page_bucket(bucket, sp_bits)) + bs->page_list = get_large_page(head, poolsize, + sp_bits); + else + bs->page_list = get_small_page(head, poolsize, + sp_bits); + /* FIXME: Try large-aligned alloc? Header stuffing? */ + if (unlikely(!bs->page_list)) + return NULL; + ph = from_pgnum(head, bs->page_list, sp_bits); + ph->bucket = bucket; + ph->elements_used = 0; + ph->next = 0; + memset(ph->used, 0, used_size(bs->elements_per_page)); + } - /* Skip header. */ - metadata += 2; + ph = from_pgnum(head, bs->page_list, sp_bits); - /* Must have been allocated. */ - assert(metadata[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT))); - metadata[bit / CHAR_BIT] &= ~(1 << (bit % CHAR_BIT)); -} + i = find_free_bit(ph->used); + set_bit(ph->used, i); + ph->elements_used++; -static void subpage_free(void *pool, unsigned long pagenum, void *free) -{ - unsigned long off = (unsigned long)free % getpagesize(); - uint8_t *metadata = get_page_metadata(pool, pagenum); - enum sub_metadata_type type; - - type = get_bit_pair(metadata, 0); - - assert(off < SUBPAGE_METAOFF); - - switch (type) { - case BITMAP: - bitmap_free(pool, pagenum, off, metadata); - break; - case UNIFORM: - uniform_free(pool, pagenum, off, metadata); - break; - default: - assert(0); + /* check if this page is now full */ + if (unlikely(ph->elements_used == bs->elements_per_page)) { + del_from_bucket_list(head, bs, ph, sp_bits); + add_to_bucket_full_list(head, bs, ph, sp_bits); } + + return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE, + bs->elements_per_page) + + i * bucket_to_size(bucket); } void alloc_free(void *pool, unsigned long poolsize, void *free) { - unsigned long pagenum; - struct metaheader *mh; + struct header *head = pool; + struct bucket_state *bs; + unsigned int sp_bits; + unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool; + bool smallpage; + struct page_header *ph; + + if (poolsize < MIN_USEFUL_SIZE) { + return tiny_alloc_free(pool, poolsize, free); + } + + /* Get page header. */ + sp_bits = small_page_bits(poolsize); + pgnum = offset >> sp_bits; + + /* Big page? Round down further. */ + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + smallpage = false; + pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1); + } else + smallpage = true; - if (!free) + /* Step back to page header. */ + ph = from_pgnum(head, pgnum, sp_bits); + if ((void *)ph == free) { + huge_free(head, poolsize, free); return; + } - assert(poolsize >= MIN_SIZE); - - mh = first_mheader(pool, poolsize); - assert((char *)free >= (char *)(mh + 1)); - assert((char *)pool + poolsize > (char *)free); + bs = &head->bs[ph->bucket]; + pgoffset = offset - (pgnum << sp_bits) + - page_header_size(ph->bucket / INTER_BUCKET_SPACE, + bs->elements_per_page); - pagenum = pool_offset(pool, free) / getpagesize(); + if (unlikely(ph->elements_used == bs->elements_per_page)) { + del_from_bucket_full_list(head, bs, ph, sp_bits); + add_to_bucket_list(head, bs, ph, sp_bits); + } - if (get_page_state(pool, pagenum) == SPECIAL) - subpage_free(pool, pagenum, free); - else { - assert((unsigned long)free % getpagesize() == 0); - alloc_free_pages(pool, pagenum); + /* Which element are we? */ + i = pgoffset / bucket_to_size(ph->bucket); + clear_bit(ph->used, i); + ph->elements_used--; + + if (unlikely(ph->elements_used == 0)) { + bs = &head->bs[ph->bucket]; + del_from_bucket_list(head, bs, ph, sp_bits); + if (smallpage) + add_small_page_to_freelist(head, ph, sp_bits); + else + add_large_page_to_freelist(head, ph, sp_bits); } } unsigned long alloc_size(void *pool, unsigned long poolsize, void *p) { - unsigned long len, pagenum; - struct metaheader *mh; - - assert(poolsize >= MIN_SIZE); + struct header *head = pool; + unsigned int pgnum, sp_bits; + unsigned long offset = (char *)p - (char *)pool; + struct page_header *ph; - mh = first_mheader(pool, poolsize); - assert((char *)p >= (char *)(mh + 1)); - assert((char *)pool + poolsize > (char *)p); + if (poolsize < MIN_USEFUL_SIZE) + return tiny_alloc_size(pool, poolsize, p); - pagenum = pool_offset(pool, p) / getpagesize(); + /* Get page header. */ + sp_bits = small_page_bits(poolsize); + pgnum = offset >> sp_bits; - if (get_page_state(pool, pagenum) == SPECIAL) { - unsigned long off = (unsigned long)p % getpagesize(); - uint8_t *metadata = get_page_metadata(pool, pagenum); - enum sub_metadata_type type = get_bit_pair(metadata, 0); + /* Big page? Round down further. */ + if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) + pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1); - assert(off < SUBPAGE_METAOFF); - - switch (type) { - case BITMAP: - assert(off % BITMAP_GRANULARITY == 0); - off /= BITMAP_GRANULARITY; - - /* Offset by one because first bit used for header. */ - off++; - len = BITMAP_GRANULARITY; - while (++off < SUBPAGE_METAOFF / BITMAP_GRANULARITY - && get_bit_pair(metadata, off) == TAKEN) - len += BITMAP_GRANULARITY; - break; - case UNIFORM: - len = decode_usize(metadata); - break; - default: - assert(0); - } - } else { - len = getpagesize(); - while (get_page_state(pool, ++pagenum) == TAKEN) - len += getpagesize(); - } + /* Step back to page header. */ + ph = from_pgnum(head, pgnum, sp_bits); + if ((void *)ph == p) + return huge_size(head, p); - return len; + return bucket_to_size(ph->bucket); } -static bool is_metadata_page(void *pool, unsigned long poolsize, - unsigned long page) +/* Useful for gdb breakpoints. */ +static bool check_fail(void) { - struct metaheader *mh; - - for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ - unsigned long start, end; - - start = pool_offset(pool, mh); - end = pool_offset(pool, (char *)(mh+1) - + get_metalen(pool, poolsize, mh)); - if (page >= start/getpagesize() && page < end/getpagesize()) - return true; - } return false; } -static bool check_bitmap_metadata(void *pool, unsigned long *mhoff) +static unsigned long count_bits(const unsigned long bitmap[], + unsigned long limit) { - enum alloc_state last_state = FREE; - unsigned int i; - - for (i = 0; i < SUBPAGE_METAOFF / BITMAP_GRANULARITY; i++) { - enum alloc_state state; + unsigned long i, count = 0; - /* +1 because header is the first byte. */ - state = get_bit_pair((uint8_t *)pool + *mhoff, i+1); - switch (state) { - case SPECIAL: - return false; - case TAKEN: - if (last_state == FREE) - return false; - break; - default: - break; - } - last_state = state; + while (limit >= BITS_PER_LONG) { + count += popcount(bitmap[0]); + bitmap++; + limit -= BITS_PER_LONG; } - return true; + + for (i = 0; i < limit; i++) + if (test_bit(bitmap, i)) + count++; + return count; } -static bool check_uniform_metadata(void *pool, unsigned long *mhoff) +static bool out_of_bounds(unsigned long pgnum, + unsigned int sp_bits, + unsigned long pagesize, + unsigned long poolsize) { - uint8_t *meta = (uint8_t *)pool + *mhoff; - unsigned int i, usize; - struct uniform_cache *uc = pool; - - usize = decode_usize(meta); - if (usize == 0 || suitable_for_uc(usize, 1) != usize) - return false; - - /* If it's in uniform cache, make sure that agrees on size. */ - for (i = 0; i < UNIFORM_CACHE_NUM; i++) { - uint8_t *ucm; - - if (!uc->size[i]) - continue; + if (((pgnum << sp_bits) >> sp_bits) != pgnum) + return true; - ucm = get_page_metadata(pool, uc->page[i]); - if (ucm != meta) - continue; + if ((pgnum << sp_bits) > poolsize) + return true; - if (usize != uc->size[i]) - return false; - } - return true; + return ((pgnum << sp_bits) + pagesize > poolsize); } -static bool check_subpage(void *pool, unsigned long poolsize, - unsigned long page) +static bool check_bucket(struct header *head, + unsigned long poolsize, + unsigned long pages[], + struct bucket_state *bs, + unsigned int bindex) { - unsigned long *mhoff = metadata_off(pool, page); - - if (*mhoff + sizeof(struct metaheader) > poolsize) - return false; - - if (*mhoff % ALIGNOF(struct metaheader) != 0) - return false; - - /* It must point to a metadata page. */ - if (!is_metadata_page(pool, poolsize, *mhoff / getpagesize())) - return false; - - /* Header at start of subpage allocation */ - switch (get_bit_pair((uint8_t *)pool + *mhoff, 0)) { - case BITMAP: - return check_bitmap_metadata(pool, mhoff); - case UNIFORM: - return check_uniform_metadata(pool, mhoff); - default: - return false; + bool lp_bucket; + struct page_header *ph; + unsigned long taken, i, prev, pagesize, sp_bits, lp_bits; + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + lp_bucket = large_page_bucket(bindex, sp_bits); + + pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits); + + /* This many elements fit? */ + taken = page_header_size(bindex / INTER_BUCKET_SPACE, + bs->elements_per_page); + taken += bucket_to_size(bindex) * bs->elements_per_page; + if (taken > pagesize) + return check_fail(); + + /* One more wouldn't fit? */ + taken = page_header_size(bindex / INTER_BUCKET_SPACE, + bs->elements_per_page + 1); + taken += bucket_to_size(bindex) * (bs->elements_per_page + 1); + if (taken <= pagesize) + return check_fail(); + + /* Walk used list. */ + prev = 0; + for (i = bs->page_list; i; i = ph->next) { + /* Bad pointer? */ + if (out_of_bounds(i, sp_bits, pagesize, poolsize)) + return check_fail(); + /* Wrong size page? */ + if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE) + != lp_bucket) + return check_fail(); + /* Large page not on boundary? */ + if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0) + return check_fail(); + ph = from_pgnum(head, i, sp_bits); + /* Linked list corrupt? */ + if (ph->prev != prev) + return check_fail(); + /* Already seen this page? */ + if (test_bit(pages, i)) + return check_fail(); + set_bit(pages, i); + /* Empty or full? */ + if (ph->elements_used == 0) + return check_fail(); + if (ph->elements_used >= bs->elements_per_page) + return check_fail(); + /* Used bits don't agree? */ + if (ph->elements_used != count_bits(ph->used, + bs->elements_per_page)) + return check_fail(); + /* Wrong bucket? */ + if (ph->bucket != bindex) + return check_fail(); + prev = i; } + /* Walk full list. */ + prev = 0; + for (i = bs->full_list; i; i = ph->next) { + /* Bad pointer? */ + if (out_of_bounds(i, sp_bits, pagesize, poolsize)) + return check_fail(); + /* Wrong size page? */ + if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE) + != lp_bucket) + /* Large page not on boundary? */ + if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0) + return check_fail(); + ph = from_pgnum(head, i, sp_bits); + /* Linked list corrupt? */ + if (ph->prev != prev) + return check_fail(); + /* Already seen this page? */ + if (test_bit(pages, i)) + return check_fail(); + set_bit(pages, i); + /* Not full? */ + if (ph->elements_used != bs->elements_per_page) + return check_fail(); + /* Used bits don't agree? */ + if (ph->elements_used != count_bits(ph->used, + bs->elements_per_page)) + return check_fail(); + /* Wrong bucket? */ + if (ph->bucket != bindex) + return check_fail(); + prev = i; + } + return true; } bool alloc_check(void *pool, unsigned long poolsize) { - unsigned long i; - struct metaheader *mh; - enum alloc_state last_state = FREE; - bool was_metadata = false; - - if (poolsize < MIN_SIZE) - return true; - - if (get_page_state(pool, 0) != TAKEN_START) - return false; - - /* First check metadata pages. */ - /* Metadata pages will be marked TAKEN. */ - for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){ - unsigned long start, end; - - start = pool_offset(pool, mh); - if (start + sizeof(*mh) > poolsize) - return false; + struct header *head = pool; + unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets; + struct page_header *ph; + struct huge_alloc *ha; + unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 }; + + if (poolsize < MIN_USEFUL_SIZE) + return tiny_alloc_check(pool, poolsize); + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + num_buckets = max_bucket(lp_bits); + + header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1); + + /* First, set all bits taken by header. */ + for (i = 0; i < header_size; i += (1UL << sp_bits)) + set_bit(pages, i >> sp_bits); + + /* Check small page free list. */ + prev = 0; + for (i = head->small_free_list; i; i = ph->next) { + /* Bad pointer? */ + if (out_of_bounds(i, sp_bits, 1UL << sp_bits, poolsize)) + return check_fail(); + /* Large page? */ + if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) + return check_fail(); + ph = from_pgnum(head, i, sp_bits); + /* Linked list corrupt? */ + if (ph->prev != prev) + return check_fail(); + /* Already seen this page? */ + if (test_bit(pages, i)) + return check_fail(); + set_bit(pages, i); + prev = i; + } - end = pool_offset(pool, (char *)(mh+1) - + get_metalen(pool, poolsize, mh)); - if (end > poolsize) - return false; + /* Check large page free list. */ + prev = 0; + for (i = head->large_free_list; i; i = ph->next) { + /* Bad pointer? */ + if (out_of_bounds(i, sp_bits, 1UL << lp_bits, poolsize)) + return check_fail(); + /* Not large page? */ + if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) + return check_fail(); + /* Not page boundary? */ + if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0) + return check_fail(); + ph = from_pgnum(head, i, sp_bits); + /* Linked list corrupt? */ + if (ph->prev != prev) + return check_fail(); + /* Already seen this page? */ + if (test_bit(pages, i)) + return check_fail(); + set_bit(pages, i); + prev = i; + } - /* Non-first pages should start on a page boundary. */ - if (mh != first_mheader(pool, poolsize) - && start % getpagesize() != 0) - return false; + /* Check the buckets. */ + for (i = 0; i < max_bucket(lp_bits); i++) { + struct bucket_state *bs = &head->bs[i]; - /* It should end on a page boundary. */ - if (end % getpagesize() != 0) + if (!check_bucket(head, poolsize, pages, bs, i)) return false; } - for (i = 0; i < poolsize / getpagesize(); i++) { - enum alloc_state state = get_page_state(pool, i); - bool is_metadata = is_metadata_page(pool, poolsize,i); + /* Check the huge alloc list. */ + prev = 0; + for (i = head->huge; i; i = ha->next) { + unsigned long pgbits, j; + + /* Bad pointer? */ + if (i >= poolsize || i + sizeof(*ha) > poolsize) + return check_fail(); + ha = (void *)((char *)head + i); + + /* Check contents of ha. */ + if (ha->off > poolsize || ha->off + ha->len > poolsize) + return check_fail(); + + /* Large or small page? */ + pgbits = test_bit(head->pagesize, ha->off >> lp_bits) + ? lp_bits : sp_bits; + + /* Not page boundary? */ + if ((ha->off % (1UL << pgbits)) != 0) + return check_fail(); + + /* Not page length? */ + if ((ha->len % (1UL << pgbits)) != 0) + return check_fail(); + + /* Linked list corrupt? */ + if (ha->prev != prev) + return check_fail(); + + for (j = ha->off; j < ha->off + ha->len; j += (1UL<> sp_bits)) + return check_fail(); + set_bit(pages, j >> sp_bits); + } - switch (state) { - case FREE: - /* metadata pages are never free. */ - if (is_metadata) - return false; - case TAKEN_START: - break; - case TAKEN: - /* This should continue a previous block. */ - if (last_state == FREE) - return false; - if (is_metadata != was_metadata) - return false; - break; - case SPECIAL: - /* Check metadata pointer etc. */ - if (!check_subpage(pool, poolsize, i)) - return false; + prev = i; + } + + /* Make sure every page accounted for. */ + for (i = 0; i < poolsize >> sp_bits; i++) { + if (!test_bit(pages, i)) + return check_fail(); + if (test_bit(head->pagesize, + i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) { + /* Large page, skip rest. */ + i += SMALL_PAGES_PER_LARGE_PAGE - 1; } - last_state = state; - was_metadata = is_metadata; } + return true; } -void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) +static unsigned long print_overhead(FILE *out, const char *desc, + unsigned long bytes, + unsigned long poolsize) { - struct metaheader *mh; - struct uniform_cache *uc = pool; - unsigned long pagebitlen, metadata_pages, count[1<size[i] != 0); - fprintf(out, "Uniform cache (%lu entries):\n", tot); - for (i = 0; i < UNIFORM_CACHE_NUM; i++) { - unsigned int j, total = 0; - uint8_t *meta; - - if (!uc->size[i]) - continue; - - /* First two bytes are header. */ - meta = get_page_metadata(pool, uc->page[i]) + 2; - - for (j = 0; j < SUBPAGE_METAOFF / uc->size[i]; j++) - if (meta[j / 8] & (1 << (j % 8))) - total++; - - printf(" %u: %u/%zu (%zu%% density)\n", - uc->size[j], total, SUBPAGE_METAOFF / uc->size[i], - (total * 100) / (SUBPAGE_METAOFF / uc->size[i])); - } - - memset(count, 0, sizeof(count)); - for (i = 0; i < poolsize / getpagesize(); i++) - count[get_page_state(pool, i)]++; - - mh = first_mheader(pool, poolsize); - pagebitlen = (uint8_t *)mh - get_page_statebits(pool); - fprintf(out, "%lu bytes of page bits: FREE/TAKEN/TAKEN_START/SUBPAGE = %lu/%lu/%lu/%lu\n", - pagebitlen, count[0], count[1], count[2], count[3]); - - /* One metadata page for every page of page bits. */ - metadata_pages = div_up(pagebitlen, getpagesize()); - - /* Now do each metadata page. */ - for (; mh; mh = next_mheader(pool,mh)) { - unsigned long free = 0, bitmapblocks = 0, uniformblocks = 0, - len = 0, uniformlen = 0, bitmaplen = 0, metalen; - uint8_t *meta = (uint8_t *)(mh + 1); - - metalen = get_metalen(pool, poolsize, mh); - metadata_pages += (sizeof(*mh) + metalen) / getpagesize(); - - for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) { - switch (get_bit_pair(meta, i)) { - case FREE: - len = 1; - free++; - break; - case BITMAP: - /* Skip over this allocated part. */ - len = BITMAP_METALEN * CHAR_BIT; - bitmapblocks++; - bitmaplen += len; - break; - case UNIFORM: - /* Skip over this part. */ - len = decode_usize(meta + i/METADATA_PER_BYTE); - len = uniform_metalen(len) * METADATA_PER_BYTE; - uniformblocks++; - uniformlen += len; - break; - default: - assert(0); - } - } + fprintf(out, "Overhead (%s): %lu bytes (%.3g%%)\n", + desc, bytes, 100.0 * bytes / poolsize); + return bytes; +} - fprintf(out, "Metadata %lu-%lu: %lu free, %lu bitmapblocks, %lu uniformblocks, %lu%% density\n", - pool_offset(pool, mh), - pool_offset(pool, (char *)(mh+1) + metalen), - free, bitmapblocks, uniformblocks, - (bitmaplen + uniformlen) * 100 - / (free + bitmaplen + uniformlen)); +static unsigned long count_list(struct header *head, + u16 pgnum, + unsigned int sp_bits, + unsigned long *total_elems) +{ + struct page_header *p; + unsigned long ret = 0; + + while (pgnum) { + p = from_pgnum(head, pgnum, sp_bits); + if (total_elems) + (*total_elems) += p->elements_used; + ret++; + pgnum = p->next; } + return ret; +} - /* Account for total pages allocated. */ - tot = (count[1] + count[2] - metadata_pages) * getpagesize(); +static unsigned long visualize_bucket(FILE *out, struct header *head, + unsigned int bucket, + unsigned long poolsize, + unsigned int sp_bits) +{ + unsigned long num_full, num_partial, num_pages, page_size, + elems, hdr_min, hdr_size, elems_per_page, overhead = 0; + + elems_per_page = head->bs[bucket].elements_per_page; + + /* If we used byte-based bitmaps, we could get pg hdr to: */ + hdr_min = sizeof(struct page_header) + - sizeof(((struct page_header *)0)->used) + + align_up(elems_per_page, CHAR_BIT) / CHAR_BIT; + hdr_size = page_header_size(bucket / INTER_BUCKET_SPACE, + elems_per_page); + + elems = 0; + num_full = count_list(head, head->bs[bucket].full_list, sp_bits, + &elems); + num_partial = count_list(head, head->bs[bucket].page_list, sp_bits, + &elems); + num_pages = num_full + num_partial; + if (!num_pages) + return 0; - fprintf(out, "Total metadata bytes = %lu\n", - metadata_pages * getpagesize()); + fprintf(out, "Bucket %u (%lu bytes):" + " %lu full, %lu partial = %lu elements\n", + bucket, bucket_to_size(bucket), num_full, num_partial, elems); + /* Strict requirement of page header size. */ + overhead += print_overhead(out, "page headers", + hdr_min * num_pages, poolsize); + /* Gap between minimal page header and actual start. */ + overhead += print_overhead(out, "page post-header alignments", + (hdr_size - hdr_min) * num_pages, poolsize); + /* Between last element and end of page. */ + page_size = (1UL << sp_bits); + if (large_page_bucket(bucket, sp_bits)) + page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE; + + overhead += print_overhead(out, "page tails", + (page_size - (hdr_size + + (elems_per_page + * bucket_to_size(bucket)))) + * num_pages, poolsize); + return overhead; +} - /* Now do every subpage. */ - for (i = 0; i < poolsize / getpagesize(); i++) { - uint8_t *meta; - unsigned int j, allocated; - enum sub_metadata_type type; +void alloc_visualize(FILE *out, void *pool, unsigned long poolsize) +{ + struct header *head = pool; + unsigned long i, lp_bits, sp_bits, header_size, num_buckets, count, + overhead = 0; - if (get_page_state(pool, i) != SPECIAL) - continue; + fprintf(out, "Pool %p size %lu: (%s allocator)\n", pool, poolsize, + poolsize < MIN_USEFUL_SIZE ? "tiny" : "standard"); - memset(count, 0, sizeof(count)); - - meta = get_page_metadata(pool, i); - type = get_bit_pair(meta, 0); - - if (type == BITMAP) { - for (j = 0; j < SUBPAGE_METAOFF/BITMAP_GRANULARITY; j++) - count[get_page_state(meta, j)]++; - allocated = (count[1] + count[2]) * BITMAP_GRANULARITY; - fprintf(out, "Subpage bitmap "); - } else { - unsigned int usize = decode_usize(meta); - - assert(type == UNIFORM); - fprintf(out, "Subpage uniform (%u) ", usize); - meta += 2; - for (j = 0; j < SUBPAGE_METAOFF / usize; j++) - count[!!(meta[j / 8] & (1 << (j % 8)))]++; - allocated = count[1] * usize; - } - fprintf(out, "%lu: FREE/TAKEN/TAKEN_START = %lu/%lu/%lu %u%% density\n", - i, count[0], count[1], count[2], - allocated * 100 / getpagesize()); - tot += allocated; + if (poolsize < MIN_USEFUL_SIZE) { + tiny_alloc_visualize(out, pool, poolsize); + return; } - - /* This is optimistic, since we overalloc in several cases. */ - fprintf(out, "Best possible allocation density = %lu%%\n", - tot * 100 / poolsize); + + sp_bits = small_page_bits(poolsize); + lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE; + + num_buckets = max_bucket(lp_bits); + header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1); + + fprintf(out, "Large page size %lu, small page size %lu.\n", + 1UL << lp_bits, 1UL << sp_bits); + overhead += print_overhead(out, "unused pool tail", + poolsize % (1UL << lp_bits), poolsize); + fprintf(out, "Main header %lu bytes (%lu small pages).\n", + header_size, align_up(header_size, 1UL << sp_bits) >> sp_bits); + overhead += print_overhead(out, "partial header page", + align_up(header_size, 1UL << sp_bits) + - header_size, poolsize); + /* Total large pages. */ + i = count_bits(head->pagesize, poolsize >> lp_bits); + /* Used pages. */ + count = i - count_list(head, head->large_free_list, sp_bits, NULL); + fprintf(out, "%lu/%lu large pages used (%.3g%%)\n", + count, i, count ? 100.0 * count / i : 0.0); + + /* Total small pages. */ + i = ((poolsize >> lp_bits) - i) << BITS_FROM_SMALL_TO_LARGE_PAGE; + /* Used pages */ + count = i - count_list(head, head->small_free_list, sp_bits, NULL); + fprintf(out, "%lu/%lu small pages used (%.3g%%)\n", + count, i, count ? 100.0 * count / i : 0.0); + + /* Summary of each bucket. */ + fprintf(out, "%lu buckets:\n", num_buckets); + for (i = 0; i < num_buckets; i++) + overhead += visualize_bucket(out, head, i, poolsize, sp_bits); + + print_overhead(out, "total", overhead, poolsize); }