#include <assert.h>
#include <stdlib.h>
#include "alloc.h"
-#include "build_assert/build_assert.h"
-#include "alignof/alignof.h"
+#include "bitops.h"
+#include "tiny.h"
+#include <ccan/build_assert/build_assert.h>
+#include <ccan/likely/likely.h>
+#include <ccan/short_types/short_types.h>
#include "config.h"
-/* FIXME: We assume getpagesize() doesnt change. Remapping file with
- * different pagesize should still work. */
+/*
+ Inspired by (and parts taken from) Andrew Tridgell's alloc_mmap:
+ http://samba.org/~tridge/junkcode/alloc_mmap/
+
+ Copyright (C) Andrew Tridgell 2007
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
-/* FIXME: Doesn't handle non-page-aligned poolsize. */
+#if 0 /* Until we have the tiny allocator working, go down to 64k */
-/* FIXME: Reduce. */
-#define MIN_SIZE (getpagesize() * 2)
+/* We divide the pool into this many large pages (nearest power of 2) */
+#define MAX_LARGE_PAGES (1024UL)
-/* What's the granularity of sub-page allocs? */
-#define BITMAP_GRANULARITY 4
+/* 32 small pages == 1 large page. */
+#define BITS_FROM_SMALL_TO_LARGE_PAGE 5
-/* File layout:
- *
- * file := pagestates pad uniform-cache metadata
- * pagestates := pages * 2-bits-per-page
- * pad := pad to next ALIGNOF(metaheader)
- *
- * metadata := metalen next-ptr metabits
- * metabits := freeblock | bitblock | uniformblock
- * freeblock := FREE +
- * bitblock := BITMAP + 2-bits-per-bit-in-page + pad-to-byte
- * uniformblock := UNIFORM + 14-bit-byte-len + bits + pad-to-byte
- */
-#define UNIFORM_CACHE_NUM 16
-struct uniform_cache
-{
- uint16_t size[UNIFORM_CACHE_NUM];
- /* These could be u32 if we're prepared to limit size. */
- unsigned long page[UNIFORM_CACHE_NUM];
-};
+#else
-struct metaheader
-{
- /* Next meta header, or 0 */
- unsigned long next;
- /* Bits start here. */
-};
+#define MAX_LARGE_PAGES (128UL)
+#define BITS_FROM_SMALL_TO_LARGE_PAGE 4
-/* Assumes a is a power of two. */
-static unsigned long align_up(unsigned long x, unsigned long a)
-{
- return (x + a - 1) & ~(a - 1);
-}
+#endif
-static unsigned long align_down(unsigned long x, unsigned long a)
-{
- return x & ~(a - 1);
-}
+#define MAX_SMALL_PAGES (MAX_LARGE_PAGES << BITS_FROM_SMALL_TO_LARGE_PAGE)
-static unsigned long div_up(unsigned long x, unsigned long a)
-{
- return (x + a - 1) / a;
-}
+/* Smallest pool size for this scheme: 128-byte small pages. That's
+ * 9/13% overhead for 32/64 bit. */
+#define MIN_USEFUL_SIZE (MAX_SMALL_PAGES * 128)
-/* It turns out that we spend a lot of time dealing with bit pairs.
- * These routines manipulate them.
- */
-static uint8_t get_bit_pair(const uint8_t *bits, unsigned long index)
-{
- return bits[index * 2 / CHAR_BIT] >> (index * 2 % CHAR_BIT) & 3;
-}
+/* Every 4 buckets, we jump up a power of 2. ...8 10 12 14 16 20 24 28 32... */
+#define INTER_BUCKET_SPACE 4
-static void set_bit_pair(uint8_t *bits, unsigned long index, uint8_t val)
-{
- bits[index * 2 / CHAR_BIT] &= ~(3 << (index * 2 % CHAR_BIT));
- bits[index * 2 / CHAR_BIT] |= (val << (index * 2 % CHAR_BIT));
-}
+#define SMALL_PAGES_PER_LARGE_PAGE (1 << BITS_FROM_SMALL_TO_LARGE_PAGE)
-/* This is used for page states and subpage allocations */
-enum alloc_state
-{
- FREE,
- TAKEN,
- TAKEN_START,
- SPECIAL, /* Sub-page allocation for page states. */
-};
+/* FIXME: Figure this out properly. */
+#define MAX_SIZE (1 << 30)
-/* The types for subpage metadata. */
-enum sub_metadata_type
-{
- /* FREE is same as alloc state */
- BITMAP = 1, /* bitmap allocated page */
- UNIFORM, /* uniform size allocated page */
+/* How few object to fit in a page before using a larger one? (8) */
+#define MAX_PAGE_OBJECT_ORDER 3
+
+#define BITS_PER_LONG (sizeof(long) * CHAR_BIT)
+
+struct bucket_state {
+ u32 elements_per_page;
+ u16 page_list;
+ u16 full_list;
};
-/* Page states are represented by bitpairs, at the start of the pool. */
-#define BITS_PER_PAGE 2
+struct header {
+ /* Bitmap of which pages are large. */
+ unsigned long pagesize[MAX_LARGE_PAGES / BITS_PER_LONG];
+
+ /* List of unused small/large pages. */
+ u16 small_free_list;
+ u16 large_free_list;
-/* How much metadata info per byte? */
-#define METADATA_PER_BYTE (CHAR_BIT / 2)
+ /* This is less defined: we have two buckets for each power of 2 */
+ struct bucket_state bs[1];
+};
-static uint8_t *get_page_statebits(const void *pool)
+struct page_header {
+ u16 next, prev;
+ /* FIXME: We can just count all-0 and all-1 used[] elements. */
+ unsigned elements_used : 25;
+ unsigned bucket : 7;
+ unsigned long used[1]; /* One bit per element. */
+};
+
+/*
+ * Every 4 buckets, the size doubles.
+ * Between buckets, sizes increase linearly.
+ *
+ * eg. bucket 40 = 2^10 = 1024
+ * bucket 41 = 2^10 + 2^10*4 = 1024 + 256
+ * bucket 42 = 2^10 + 2^10*4 = 1024 + 512
+ * bucket 43 = 2^10 + 2^10*4 = 1024 + 768
+ * bucket 45 = 2^11 = 2048
+ *
+ * Care is taken to handle low numbered buckets, at cost of overflow.
+ */
+static unsigned long bucket_to_size(unsigned int bucket)
{
- return (uint8_t *)pool + sizeof(struct uniform_cache);
+ unsigned long base = 1 << (bucket / INTER_BUCKET_SPACE);
+ return base + ((bucket % INTER_BUCKET_SPACE)
+ << (bucket / INTER_BUCKET_SPACE))
+ / INTER_BUCKET_SPACE;
}
-static enum alloc_state get_page_state(const void *pool, unsigned long page)
+/*
+ * Say size is 10.
+ * fls(size/2) == 3. 1 << 3 == 8, so we're 2 too large, out of a possible
+ * 8 too large. That's 1/4 of the way to the next power of 2 == 1 bucket.
+ *
+ * We make sure we round up. Note that this fails on 32 bit at size
+ * 1879048193 (around bucket 120).
+ */
+static unsigned int size_to_bucket(unsigned long size)
{
- return get_bit_pair(get_page_statebits(pool), page);
+ unsigned int base = fls(size/2);
+ unsigned long overshoot;
+
+ overshoot = size - (1 << base);
+ return base * INTER_BUCKET_SPACE
+ + ((overshoot * INTER_BUCKET_SPACE + (1 << base)-1) >> base);
}
-static void set_page_state(void *pool, unsigned long page, enum alloc_state s)
+static unsigned int small_page_bits(unsigned long poolsize)
{
- set_bit_pair(get_page_statebits(pool), page, s);
+ return fls(poolsize / MAX_SMALL_PAGES / 2);
}
-/* The offset of metadata for a subpage allocation is found at the end
- * of the subpage */
-#define SUBPAGE_METAOFF (getpagesize() - sizeof(unsigned long))
-
-/* This is the length of metadata in bits. It consists of two bits
- * for every BITMAP_GRANULARITY of usable bytes in the page, then two
- * bits for the tailer.. */
-#define BITMAP_METABITLEN \
- ((div_up(SUBPAGE_METAOFF, BITMAP_GRANULARITY) + 1) * BITS_PER_PAGE)
-
-/* This is the length in bytes. */
-#define BITMAP_METALEN (div_up(BITMAP_METABITLEN, CHAR_BIT))
-
-static struct metaheader *first_mheader(void *pool, unsigned long poolsize)
+static struct page_header *from_pgnum(struct header *head,
+ unsigned long pgnum,
+ unsigned sp_bits)
{
- unsigned int pagestatelen;
-
- pagestatelen = align_up(div_up(poolsize/getpagesize() * BITS_PER_PAGE,
- CHAR_BIT),
- ALIGNOF(struct metaheader));
- return (struct metaheader *)(get_page_statebits(pool) + pagestatelen);
+ return (struct page_header *)((char *)head + (pgnum << sp_bits));
}
-static struct metaheader *next_mheader(void *pool, struct metaheader *mh)
+static u16 to_pgnum(struct header *head, void *p, unsigned sp_bits)
{
- if (!mh->next)
- return NULL;
-
- return (struct metaheader *)((char *)pool + mh->next);
+ return ((char *)p - (char *)head) >> sp_bits;
}
-static unsigned long pool_offset(void *pool, void *p)
+static size_t used_size(unsigned int num_elements)
{
- return (char *)p - (char *)pool;
+ return align_up(num_elements, BITS_PER_LONG) / CHAR_BIT;
}
-void alloc_init(void *pool, unsigned long poolsize)
+/*
+ * We always align the first entry to the lower power of 2.
+ * eg. the 12-byte bucket gets 8-byte aligned. The 4096-byte bucket
+ * gets 4096-byte aligned.
+ */
+static unsigned long page_header_size(unsigned int align_bits,
+ unsigned long num_elements)
{
- /* FIXME: Alignment assumptions about pool. */
- unsigned long len, i;
- struct metaheader *mh;
-
- if (poolsize < MIN_SIZE)
- return;
-
- mh = first_mheader(pool, poolsize);
+ unsigned long size;
- /* Mark all page states FREE, all uniform caches zero, and all of
- * metaheader bitmap which takes rest of first page. */
- len = align_up(pool_offset(pool, mh + 1), getpagesize());
- BUILD_ASSERT(FREE == 0);
- memset(pool, 0, len);
-
- /* Mark the pagestate and metadata page(s) allocated. */
- set_page_state(pool, 0, TAKEN_START);
- for (i = 1; i < div_up(len, getpagesize()); i++)
- set_page_state(pool, i, TAKEN);
+ size = sizeof(struct page_header)
+ - sizeof(((struct page_header *)0)->used)
+ + used_size(num_elements);
+ return align_up(size, 1 << align_bits);
}
-/* Two bits per element, representing page states. Returns 0 on fail.
- * off is used to allocate from subpage bitmaps, which use the first 2
- * bits as the type, so the real bitmap is offset by 1. */
-static unsigned long alloc_from_bitmap(uint8_t *bits, unsigned long off,
- unsigned long elems,
- unsigned long want, unsigned long align)
+static void add_to_list(struct header *head,
+ u16 *list, struct page_header *ph, unsigned sp_bits)
{
- long i;
- unsigned long free;
-
- free = 0;
- /* We allocate from far end, to increase ability to expand metadata. */
- for (i = elems - 1; i >= 0; i--) {
- switch (get_bit_pair(bits, off+i)) {
- case FREE:
- if (++free >= want) {
- unsigned long j;
-
- /* They might ask for large alignment. */
- if (align && i % align)
- continue;
-
- set_bit_pair(bits, off+i, TAKEN_START);
- for (j = i+1; j < i + want; j++)
- set_bit_pair(bits, off+j, TAKEN);
- return off+i;
- }
- break;
- case SPECIAL:
- case TAKEN_START:
- case TAKEN:
- free = 0;
- break;
- }
- }
+ unsigned long h = *list, offset = to_pgnum(head, ph, sp_bits);
- return 0;
+ ph->next = h;
+ if (h) {
+ struct page_header *prev = from_pgnum(head, h, sp_bits);
+ assert(prev->prev == 0);
+ prev->prev = offset;
+ }
+ *list = offset;
+ ph->prev = 0;
}
-static unsigned long alloc_get_pages(void *pool, unsigned long poolsize,
- unsigned long pages, unsigned long align)
+static void del_from_list(struct header *head,
+ u16 *list, struct page_header *ph, unsigned sp_bits)
{
- return alloc_from_bitmap(get_page_statebits(pool),
- 0, poolsize / getpagesize(), pages,
- align / getpagesize());
+ /* Front of list? */
+ if (ph->prev == 0) {
+ *list = ph->next;
+ } else {
+ struct page_header *prev = from_pgnum(head, ph->prev, sp_bits);
+ prev->next = ph->next;
+ }
+ if (ph->next != 0) {
+ struct page_header *next = from_pgnum(head, ph->next, sp_bits);
+ next->prev = ph->prev;
+ }
}
-/* Offset to metadata is at end of page. */
-static unsigned long *metadata_off(void *pool, unsigned long page)
+static u16 pop_from_list(struct header *head,
+ u16 *list,
+ unsigned int sp_bits)
{
- return (unsigned long *)
- ((char *)pool + (page+1)*getpagesize() - sizeof(unsigned long));
-}
+ u16 h = *list;
+ struct page_header *ph = from_pgnum(head, h, sp_bits);
-static uint8_t *get_page_metadata(void *pool, unsigned long page)
-{
- return (uint8_t *)pool + *metadata_off(pool, page);
+ if (likely(h)) {
+ *list = ph->next;
+ if (*list)
+ from_pgnum(head, *list, sp_bits)->prev = 0;
+ }
+ return h;
}
-static void set_page_metadata(void *pool, unsigned long page, uint8_t *meta)
+static void add_small_page_to_freelist(struct header *head,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- *metadata_off(pool, page) = meta - (uint8_t *)pool;
+ add_to_list(head, &head->small_free_list, ph, sp_bits);
}
-static unsigned long sub_page_alloc(void *pool, unsigned long page,
- unsigned long size, unsigned long align)
+static void add_large_page_to_freelist(struct header *head,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- uint8_t *bits = get_page_metadata(pool, page);
- unsigned long i;
- enum sub_metadata_type type;
-
- type = get_bit_pair(bits, 0);
-
- /* If this is a uniform page, we can't allocate from it. */
- if (type == UNIFORM)
- return 0;
-
- assert(type == BITMAP);
-
- /* We use a standart bitmap, but offset because of that BITMAP
- * header. */
- i = alloc_from_bitmap(bits, 1, SUBPAGE_METAOFF/BITMAP_GRANULARITY,
- div_up(size, BITMAP_GRANULARITY),
- align / BITMAP_GRANULARITY);
-
- /* Can't allocate? */
- if (i == 0)
- return 0;
-
- /* i-1 because of the header. */
- return page*getpagesize() + (i-1)*BITMAP_GRANULARITY;
+ add_to_list(head, &head->large_free_list, ph, sp_bits);
}
-/* We look at the page states to figure out where the allocation for this
- * metadata ends. */
-static unsigned long get_metalen(void *pool, unsigned long poolsize,
- struct metaheader *mh)
+static void add_to_bucket_list(struct header *head,
+ struct bucket_state *bs,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- unsigned long i, first, pages = poolsize / getpagesize();
-
- first = pool_offset(pool, mh + 1)/getpagesize();
-
- for (i = first + 1; i < pages && get_page_state(pool,i) == TAKEN; i++);
-
- return i * getpagesize() - pool_offset(pool, mh + 1);
+ add_to_list(head, &bs->page_list, ph, sp_bits);
}
-static unsigned int uniform_metalen(unsigned int usize)
+static void del_from_bucket_list(struct header *head,
+ struct bucket_state *bs,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- unsigned int metalen;
-
- assert(usize < (1 << 14));
-
- /* Two bits for the header, 14 bits for size, then one bit for each
- * element the page can hold. Round up to number of bytes. */
- metalen = div_up(2 + 14 + SUBPAGE_METAOFF / usize, CHAR_BIT);
-
- /* To ensure metaheader is always aligned, round bytes up. */
- metalen = align_up(metalen, ALIGNOF(struct metaheader));
-
- return metalen;
+ del_from_list(head, &bs->page_list, ph, sp_bits);
}
-static unsigned int decode_usize(uint8_t *meta)
+static void del_from_bucket_full_list(struct header *head,
+ struct bucket_state *bs,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- return ((unsigned)meta[1] << (CHAR_BIT-2)) | (meta[0] >> 2);
+ del_from_list(head, &bs->full_list, ph, sp_bits);
}
-static void encode_usize(uint8_t *meta, unsigned int usize)
+static void add_to_bucket_full_list(struct header *head,
+ struct bucket_state *bs,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- meta[0] = (UNIFORM | (usize << 2));
- meta[1] = (usize >> (CHAR_BIT - 2));
+ add_to_list(head, &bs->full_list, ph, sp_bits);
}
-static uint8_t *alloc_metaspace(void *pool, unsigned long poolsize,
- struct metaheader *mh, unsigned long bytes,
- enum sub_metadata_type type)
+static void clear_bit(unsigned long bitmap[], unsigned int off)
{
- uint8_t *meta = (uint8_t *)(mh + 1);
- unsigned long free = 0, len, i, metalen;
-
- metalen = get_metalen(pool, poolsize, mh);
-
- /* Walk through metadata looking for free. */
- for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
- switch (get_bit_pair(meta, i)) {
- case FREE:
- len = 1;
- free++;
- if (free == bytes * METADATA_PER_BYTE) {
- /* Mark this as a bitmap. */
- set_bit_pair(meta, i - free + 1, type);
- return meta + (i - free + 1)/METADATA_PER_BYTE;
- }
- break;
- case BITMAP:
- /* Skip over this allocated part. */
- len = BITMAP_METALEN * METADATA_PER_BYTE;
- free = 0;
- break;
- case UNIFORM:
- /* Figure metalen given usize. */
- len = decode_usize(meta + i / METADATA_PER_BYTE);
- len = uniform_metalen(len) * METADATA_PER_BYTE;
- free = 0;
- break;
- default:
- assert(0);
- return NULL;
- }
- }
- return NULL;
+ bitmap[off / BITS_PER_LONG] &= ~(1 << (off % BITS_PER_LONG));
}
-/* We need this many bytes of metadata. */
-static uint8_t *new_metadata(void *pool, unsigned long poolsize,
- unsigned long bytes, enum sub_metadata_type type)
+static bool test_bit(const unsigned long bitmap[], unsigned int off)
{
- struct metaheader *mh, *newmh;
- unsigned long page;
- uint8_t *meta;
-
- for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh))
- if ((meta = alloc_metaspace(pool, poolsize, mh, bytes, type)))
- return meta;
-
- /* No room for metadata? Can we expand an existing one? */
- for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
- unsigned long nextpage;
-
- /* We start on this page. */
- nextpage = pool_offset(pool, (char *)(mh+1))/getpagesize();
- /* Iterate through any other pages we own. */
- while (get_page_state(pool, ++nextpage) == TAKEN);
-
- /* Now, can we grab that page? */
- if (get_page_state(pool, nextpage) != FREE)
- continue;
-
- /* OK, expand metadata, do it again. */
- set_page_state(pool, nextpage, TAKEN);
- BUILD_ASSERT(FREE == 0);
- memset((char *)pool + nextpage*getpagesize(), 0, getpagesize());
- return alloc_metaspace(pool, poolsize, mh, bytes, type);
- }
-
- /* No metadata left at all? */
- page = alloc_get_pages(pool, poolsize, div_up(bytes, getpagesize()), 1);
- if (!page)
- return NULL;
-
- newmh = (struct metaheader *)((char *)pool + page * getpagesize());
- BUILD_ASSERT(FREE == 0);
- memset(newmh + 1, 0, getpagesize() - sizeof(*mh));
-
- /* Sew it into linked list */
- mh = first_mheader(pool,poolsize);
- newmh->next = mh->next;
- mh->next = pool_offset(pool, newmh);
-
- return alloc_metaspace(pool, poolsize, newmh, bytes, type);
+ return bitmap[off / BITS_PER_LONG] & (1 << (off % BITS_PER_LONG));
}
-static void alloc_free_pages(void *pool, unsigned long pagenum)
+static void set_bit(unsigned long bitmap[], unsigned int off)
{
- assert(get_page_state(pool, pagenum) == TAKEN_START);
- set_page_state(pool, pagenum, FREE);
- while (get_page_state(pool, ++pagenum) == TAKEN)
- set_page_state(pool, pagenum, FREE);
+ bitmap[off / BITS_PER_LONG] |= (1 << (off % BITS_PER_LONG));
}
-static void maybe_transform_uniform_page(void *pool, unsigned long offset)
+/* There must be a bit to be found. */
+static unsigned int find_free_bit(const unsigned long bitmap[])
{
- /* FIXME: If possible and page isn't full, change to a bitmap */
+ unsigned int i;
+
+ for (i = 0; bitmap[i] == -1UL; i++);
+ return (i*BITS_PER_LONG) + ffsl(~bitmap[i]) - 1;
}
-/* Returns 0 or the size of the uniform alloc to use */
-static unsigned long suitable_for_uc(unsigned long size, unsigned long align)
+/* How many elements can we fit in a page? */
+static unsigned long elements_per_page(unsigned long align_bits,
+ unsigned long esize,
+ unsigned long psize)
{
- unsigned long num_elems, wastage, usize;
- unsigned long bitmap_cost;
+ unsigned long num, overhead;
- if (size == 0)
- size = 1;
+ /* First approximation: no extra room for bitmap. */
+ overhead = align_up(sizeof(struct page_header), 1 << align_bits);
+ num = (psize - overhead) / esize;
- /* Fix up silly alignments. */
- usize = align_up(size, align);
-
- /* How many can fit in this page? */
- num_elems = SUBPAGE_METAOFF / usize;
-
- /* Can happen with bigger alignments. */
- if (!num_elems)
- return 0;
-
- /* Usize maxes out at 14 bits. */
- if (usize >= (1 << 14))
- return 0;
-
- /* How many bytes would be left at the end? */
- wastage = SUBPAGE_METAOFF % usize;
-
- /* If we can get a larger allocation within alignment constraints, we
- * should do it, otherwise might as well leave wastage at the end. */
- usize += align_down(wastage / num_elems, align);
+ while (page_header_size(align_bits, num) + esize * num > psize)
+ num--;
+ return num;
+}
- /* Bitmap allocation costs 2 bits per BITMAP_GRANULARITY bytes, plus
- * however much we waste in rounding up to BITMAP_GRANULARITY. */
- bitmap_cost = 2 * div_up(size, BITMAP_GRANULARITY)
- + CHAR_BIT * (align_up(size, BITMAP_GRANULARITY) - size);
+static bool large_page_bucket(unsigned int bucket, unsigned int sp_bits)
+{
+ unsigned long max_smallsize;
- /* Our cost is 1 bit, plus usize overhead */
- if (bitmap_cost < 1 + (usize - size) * CHAR_BIT)
- return 0;
+ /* Note: this doesn't take into account page header. */
+ max_smallsize = (1UL << sp_bits) >> MAX_PAGE_OBJECT_ORDER;
- return usize;
+ return bucket_to_size(bucket) > max_smallsize;
}
-static unsigned long uniform_alloc(void *pool, unsigned long poolsize,
- struct uniform_cache *uc,
- unsigned long ucnum)
+static unsigned int max_bucket(unsigned int lp_bits)
{
- uint8_t *metadata = get_page_metadata(pool, uc->page[ucnum]) + 2;
- unsigned long i, max;
-
- /* Simple one-bit-per-object bitmap. */
- max = SUBPAGE_METAOFF / uc->size[ucnum];
- for (i = 0; i < max; i++) {
- if (!(metadata[i / CHAR_BIT] & (1 << (i % CHAR_BIT)))) {
- metadata[i / CHAR_BIT] |= (1 << (i % CHAR_BIT));
- return uc->page[ucnum] * getpagesize()
- + i * uc->size[ucnum];
- }
- }
-
- return 0;
+ return (lp_bits - MAX_PAGE_OBJECT_ORDER) * INTER_BUCKET_SPACE;
}
-static unsigned long new_uniform_page(void *pool, unsigned long poolsize,
- unsigned long usize)
+void alloc_init(void *pool, unsigned long poolsize)
{
- unsigned long page, metalen;
- uint8_t *metadata;
+ struct header *head = pool;
+ struct page_header *ph;
+ unsigned int lp_bits, sp_bits, num_buckets;
+ unsigned long header_size, i;
- page = alloc_get_pages(pool, poolsize, 1, 1);
- if (page == 0)
- return 0;
-
- metalen = uniform_metalen(usize);
-
- /* Get metadata for page. */
- metadata = new_metadata(pool, poolsize, metalen, UNIFORM);
- if (!metadata) {
- alloc_free_pages(pool, page);
- return 0;
+ if (poolsize < MIN_USEFUL_SIZE) {
+ tiny_alloc_init(pool, poolsize);
+ return;
}
- encode_usize(metadata, usize);
+ /* We rely on page numbers fitting in 16 bit. */
+ BUILD_ASSERT(MAX_SMALL_PAGES < 65536);
+
+ sp_bits = small_page_bits(poolsize);
+ lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
- BUILD_ASSERT(FREE == 0);
- memset(metadata + 2, 0, metalen - 2);
+ num_buckets = max_bucket(lp_bits);
- /* Actually, this is a subpage page now. */
- set_page_state(pool, page, SPECIAL);
-
- /* Set metadata pointer for page. */
- set_page_metadata(pool, page, metadata);
-
- return page;
-}
+ head = pool;
+ header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
-static unsigned long alloc_sub_page(void *pool, unsigned long poolsize,
- unsigned long size, unsigned long align)
-{
- unsigned long i, usize;
- uint8_t *metadata;
- struct uniform_cache *uc = pool;
-
- usize = suitable_for_uc(size, align);
- if (usize) {
- /* Look for a uniform page. */
- for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
- if (uc->size[i] == usize) {
- unsigned long ret;
- ret = uniform_alloc(pool, poolsize, uc, i);
- if (ret != 0)
- return ret;
- /* OK, that one is full, remove from cache. */
- uc->size[i] = 0;
- break;
- }
- }
+ memset(head, 0, header_size);
+ for (i = 0; i < num_buckets; i++) {
+ unsigned long pagesize;
- /* OK, try a new uniform page. Use random discard for now. */
- i = random() % UNIFORM_CACHE_NUM;
- maybe_transform_uniform_page(pool, uc->page[i]);
+ if (large_page_bucket(i, sp_bits))
+ pagesize = 1UL << lp_bits;
+ else
+ pagesize = 1UL << sp_bits;
- uc->page[i] = new_uniform_page(pool, poolsize, usize);
- if (uc->page[i]) {
- uc->size[i] = usize;
- return uniform_alloc(pool, poolsize, uc, i);
- }
- uc->size[i] = 0;
+ head->bs[i].elements_per_page
+ = elements_per_page(i / INTER_BUCKET_SPACE,
+ bucket_to_size(i),
+ pagesize);
}
- /* Look for partial page. */
- for (i = 0; i < poolsize / getpagesize(); i++) {
- unsigned long ret;
- if (get_page_state(pool, i) != SPECIAL)
- continue;
-
- ret = sub_page_alloc(pool, i, size, align);
- if (ret)
- return ret;
+ /* They start as all large pages. */
+ memset(head->pagesize, 0xFF, sizeof(head->pagesize));
+ /* FIXME: small pages for last bit? */
+
+ /* Split first page into small pages. */
+ assert(header_size << (1UL << lp_bits));
+ clear_bit(head->pagesize, 0);
+
+ /* Skip over page(s) used by header, add rest to free list */
+ for (i = align_up(header_size, (1 << sp_bits)) >> sp_bits;
+ i < SMALL_PAGES_PER_LARGE_PAGE;
+ i++) {
+ ph = from_pgnum(head, i, sp_bits);
+ ph->elements_used = 0;
+ add_small_page_to_freelist(head, ph, sp_bits);
}
- /* Create new SUBPAGE page. */
- i = alloc_get_pages(pool, poolsize, 1, 1);
- if (i == 0)
- return 0;
-
- /* Get metadata for page. */
- metadata = new_metadata(pool, poolsize, BITMAP_METALEN, BITMAP);
- if (!metadata) {
- alloc_free_pages(pool, i);
- return 0;
+ /* Add the rest of the pages as large pages. */
+ i = SMALL_PAGES_PER_LARGE_PAGE;
+ while ((i << sp_bits) + (1 << lp_bits) <= poolsize) {
+ ph = from_pgnum(head, i, sp_bits);
+ ph->elements_used = 0;
+ add_large_page_to_freelist(head, ph, sp_bits);
+ i += SMALL_PAGES_PER_LARGE_PAGE;
}
-
- /* Actually, this is a subpage page now. */
- set_page_state(pool, i, SPECIAL);
-
- /* Set metadata pointer for page. */
- set_page_metadata(pool, i, metadata);
-
- /* Do allocation like normal */
- return sub_page_alloc(pool, i, size, align);
}
-static bool bitmap_page_is_empty(uint8_t *meta)
+/* A large page worth of small pages are free: delete them from free list. */
+static void del_large_from_small_free_list(struct header *head,
+ struct page_header *ph,
+ unsigned int sp_bits)
{
- unsigned int i;
-
- /* Skip the header (first bit of metadata). */
- for (i = 1; i < SUBPAGE_METAOFF/BITMAP_GRANULARITY+1; i++)
- if (get_bit_pair(meta, i) != FREE)
- return false;
+ unsigned long i;
- return true;
+ for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
+ del_from_list(head, &head->small_free_list,
+ (void *)ph + (i << sp_bits),
+ sp_bits);
+ }
}
-static bool uniform_page_is_empty(uint8_t *meta)
+static bool all_empty(struct header *head,
+ unsigned long pgnum,
+ unsigned sp_bits)
{
- unsigned int i, metalen;
-
- metalen = uniform_metalen(decode_usize(meta));
+ unsigned long i;
- /* Skip the header (first two bytes of metadata). */
- for (i = 2; i < metalen + 2; i++) {
- BUILD_ASSERT(FREE == 0);
- if (meta[i])
+ for (i = 0; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
+ struct page_header *ph = from_pgnum(head, pgnum + i, sp_bits);
+ if (ph->elements_used)
return false;
}
return true;
}
-static bool special_page_is_empty(void *pool, unsigned long page)
+static u16 get_large_page(struct header *head, unsigned long poolsize,
+ unsigned int sp_bits)
{
- uint8_t *meta;
- enum sub_metadata_type type;
-
- meta = get_page_metadata(pool, page);
- type = get_bit_pair(meta, 0);
-
- switch (type) {
- case UNIFORM:
- return uniform_page_is_empty(meta);
- case BITMAP:
- return bitmap_page_is_empty(meta);
- default:
- assert(0);
- }
-}
+ unsigned int lp_bits, i, page;
-static void clear_special_metadata(void *pool, unsigned long page)
-{
- uint8_t *meta;
- enum sub_metadata_type type;
-
- meta = get_page_metadata(pool, page);
- type = get_bit_pair(meta, 0);
-
- switch (type) {
- case UNIFORM:
- /* First two bytes are the header, rest is already FREE */
- BUILD_ASSERT(FREE == 0);
- memset(meta, 0, 2);
- break;
- case BITMAP:
- /* First two bits is the header. */
- BUILD_ASSERT(BITMAP_METALEN > 1);
- meta[0] = 0;
- break;
- default:
- assert(0);
+ lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
+
+ page = pop_from_list(head, &head->large_free_list, sp_bits);
+ if (likely(page))
+ return page;
+
+ /* Look for small pages to coalesce, after first large page. */
+ for (i = SMALL_PAGES_PER_LARGE_PAGE;
+ i < (poolsize >> lp_bits) << BITS_FROM_SMALL_TO_LARGE_PAGE;
+ i += SMALL_PAGES_PER_LARGE_PAGE) {
+ /* Already a large page? */
+ if (test_bit(head->pagesize, i / SMALL_PAGES_PER_LARGE_PAGE))
+ continue;
+ if (all_empty(head, i, sp_bits)) {
+ struct page_header *ph = from_pgnum(head, i, sp_bits);
+ set_bit(head->pagesize,
+ i / SMALL_PAGES_PER_LARGE_PAGE);
+ del_large_from_small_free_list(head, ph, sp_bits);
+ add_large_page_to_freelist(head, ph, sp_bits);
+ }
}
+
+ return pop_from_list(head, &head->large_free_list, sp_bits);
}
-/* Returns true if we cleaned any pages. */
-static bool clean_empty_subpages(void *pool, unsigned long poolsize)
+/* Returns small page. */
+static unsigned long break_up_large_page(struct header *head,
+ unsigned int sp_bits,
+ u16 lpage)
{
- unsigned long i;
- bool progress = false;
+ unsigned int i;
- for (i = 0; i < poolsize/getpagesize(); i++) {
- if (get_page_state(pool, i) != SPECIAL)
- continue;
+ clear_bit(head->pagesize, lpage >> BITS_FROM_SMALL_TO_LARGE_PAGE);
- if (special_page_is_empty(pool, i)) {
- clear_special_metadata(pool, i);
- set_page_state(pool, i, FREE);
- progress = true;
- }
+ for (i = 1; i < SMALL_PAGES_PER_LARGE_PAGE; i++) {
+ struct page_header *ph = from_pgnum(head, lpage + i, sp_bits);
+ add_small_page_to_freelist(head, ph, sp_bits);
}
- return progress;
+
+ return lpage;
}
-/* Returns true if we cleaned any pages. */
-static bool clean_metadata(void *pool, unsigned long poolsize)
+static u16 get_small_page(struct header *head, unsigned long poolsize,
+ unsigned int sp_bits)
{
- struct metaheader *mh, *prev_mh = NULL;
- bool progress = false;
-
- for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
- uint8_t *meta;
- long i;
- unsigned long metalen = get_metalen(pool, poolsize, mh);
-
- meta = (uint8_t *)(mh + 1);
- BUILD_ASSERT(FREE == 0);
- for (i = metalen - 1; i > 0; i--)
- if (meta[i] != 0)
- break;
-
- /* Completely empty? */
- if (prev_mh && i == metalen) {
- alloc_free_pages(pool,
- pool_offset(pool, mh)/getpagesize());
- prev_mh->next = mh->next;
- mh = prev_mh;
- progress = true;
- } else {
- uint8_t *p;
-
- /* Some pages at end are free? */
- for (p = (uint8_t *)(mh+1) + metalen - getpagesize();
- p > meta + i;
- p -= getpagesize()) {
- set_page_state(pool,
- pool_offset(pool, p)
- / getpagesize(),
- FREE);
- progress = true;
- }
- }
- }
+ u16 ret;
- return progress;
+ ret = pop_from_list(head, &head->small_free_list, sp_bits);
+ if (likely(ret))
+ return ret;
+ ret = get_large_page(head, poolsize, sp_bits);
+ if (likely(ret))
+ ret = break_up_large_page(head, sp_bits, ret);
+ return ret;
}
void *alloc_get(void *pool, unsigned long poolsize,
unsigned long size, unsigned long align)
{
- bool subpage_clean = false, metadata_clean = false;
- unsigned long ret;
-
- if (poolsize < MIN_SIZE)
- return NULL;
-
-again:
- /* Sub-page allocations have an overhead of ~12%. */
- if (size + size/8 >= getpagesize() || align >= getpagesize()) {
- unsigned long pages = div_up(size, getpagesize());
-
- ret = alloc_get_pages(pool, poolsize, pages, align)
- * getpagesize();
- } else
- ret = alloc_sub_page(pool, poolsize, size, align);
-
- if (ret != 0)
- return (char *)pool + ret;
-
- /* Allocation failed: garbage collection. */
- if (!subpage_clean) {
- subpage_clean = true;
- if (clean_empty_subpages(pool, poolsize))
- goto again;
- }
+ struct header *head = pool;
+ unsigned int bucket;
+ unsigned long i;
+ struct bucket_state *bs;
+ struct page_header *ph;
+ unsigned int sp_bits;
- if (!metadata_clean) {
- metadata_clean = true;
- if (clean_metadata(pool, poolsize))
- goto again;
+ if (poolsize < MIN_USEFUL_SIZE) {
+ return tiny_alloc_get(pool, poolsize, size, align);
}
- /* FIXME: Compact metadata? */
- return NULL;
-}
+ size = align_up(size, align);
+ if (unlikely(!size))
+ size = 1;
+ bucket = size_to_bucket(size);
-static void bitmap_free(void *pool, unsigned long pagenum, unsigned long off,
- uint8_t *metadata)
-{
- assert(off % BITMAP_GRANULARITY == 0);
+ sp_bits = small_page_bits(poolsize);
- off /= BITMAP_GRANULARITY;
+ if (bucket >= max_bucket(sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE)) {
+ /* FIXME: huge alloc. */
+ return NULL;
+ }
- /* Offset by one because first bit is used for header. */
- off++;
+ bs = &head->bs[bucket];
- set_bit_pair(metadata, off++, FREE);
- while (off < SUBPAGE_METAOFF / BITMAP_GRANULARITY
- && get_bit_pair(metadata, off) == TAKEN)
- set_bit_pair(metadata, off++, FREE);
-}
+ if (!bs->page_list) {
+ struct page_header *ph;
-static void uniform_free(void *pool, unsigned long pagenum, unsigned long off,
- uint8_t *metadata)
-{
- unsigned int usize, bit;
+ if (large_page_bucket(bucket, sp_bits))
+ bs->page_list = get_large_page(head, poolsize,
+ sp_bits);
+ else
+ bs->page_list = get_small_page(head, poolsize,
+ sp_bits);
+ /* FIXME: Try large-aligned alloc? Header stuffing? */
+ if (unlikely(!bs->page_list))
+ return NULL;
+ ph = from_pgnum(head, bs->page_list, sp_bits);
+ ph->bucket = bucket;
+ ph->elements_used = 0;
+ ph->next = 0;
+ memset(ph->used, 0, used_size(bs->elements_per_page));
+ }
- usize = decode_usize(metadata);
- /* Must have been this size. */
- assert(off % usize == 0);
- bit = off / usize;
+ ph = from_pgnum(head, bs->page_list, sp_bits);
- /* Skip header. */
- metadata += 2;
+ i = find_free_bit(ph->used);
+ set_bit(ph->used, i);
+ ph->elements_used++;
- /* Must have been allocated. */
- assert(metadata[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)));
- metadata[bit / CHAR_BIT] &= ~(1 << (bit % CHAR_BIT));
-}
-
-static void subpage_free(void *pool, unsigned long pagenum, void *free)
-{
- unsigned long off = (unsigned long)free % getpagesize();
- uint8_t *metadata = get_page_metadata(pool, pagenum);
- enum sub_metadata_type type;
-
- type = get_bit_pair(metadata, 0);
-
- assert(off < SUBPAGE_METAOFF);
-
- switch (type) {
- case BITMAP:
- bitmap_free(pool, pagenum, off, metadata);
- break;
- case UNIFORM:
- uniform_free(pool, pagenum, off, metadata);
- break;
- default:
- assert(0);
+ /* check if this page is now full */
+ if (unlikely(ph->elements_used == bs->elements_per_page)) {
+ del_from_bucket_list(head, bs, ph, sp_bits);
+ add_to_bucket_full_list(head, bs, ph, sp_bits);
}
+
+ return (char *)ph + page_header_size(ph->bucket / INTER_BUCKET_SPACE,
+ bs->elements_per_page)
+ + i * bucket_to_size(bucket);
}
void alloc_free(void *pool, unsigned long poolsize, void *free)
{
- unsigned long pagenum;
- struct metaheader *mh;
-
- if (!free)
- return;
-
- assert(poolsize >= MIN_SIZE);
-
- mh = first_mheader(pool, poolsize);
- assert((char *)free >= (char *)(mh + 1));
- assert((char *)pool + poolsize > (char *)free);
+ struct header *head = pool;
+ struct bucket_state *bs;
+ unsigned int sp_bits;
+ unsigned long i, pgnum, pgoffset, offset = (char *)free - (char *)pool;
+ bool smallpage;
+ struct page_header *ph;
- pagenum = pool_offset(pool, free) / getpagesize();
+ if (poolsize < MIN_USEFUL_SIZE) {
+ return tiny_alloc_free(pool, poolsize, free);
+ }
+
+ /* Get page header. */
+ sp_bits = small_page_bits(poolsize);
+ pgnum = offset >> sp_bits;
+
+ /* Big page? Round down further. */
+ if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
+ smallpage = false;
+ pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
+ } else
+ smallpage = true;
+
+ /* Step back to page header. */
+ ph = from_pgnum(head, pgnum, sp_bits);
+ bs = &head->bs[ph->bucket];
+ pgoffset = offset - (pgnum << sp_bits)
+ - page_header_size(ph->bucket / INTER_BUCKET_SPACE,
+ bs->elements_per_page);
+
+ if (unlikely(ph->elements_used == bs->elements_per_page)) {
+ del_from_bucket_full_list(head, bs, ph, sp_bits);
+ add_to_bucket_list(head, bs, ph, sp_bits);
+ }
- if (get_page_state(pool, pagenum) == SPECIAL)
- subpage_free(pool, pagenum, free);
- else {
- assert((unsigned long)free % getpagesize() == 0);
- alloc_free_pages(pool, pagenum);
+ /* Which element are we? */
+ i = pgoffset / bucket_to_size(ph->bucket);
+ clear_bit(ph->used, i);
+ ph->elements_used--;
+
+ if (unlikely(ph->elements_used == 0)) {
+ bs = &head->bs[ph->bucket];
+ del_from_bucket_list(head, bs, ph, sp_bits);
+ if (smallpage)
+ add_small_page_to_freelist(head, ph, sp_bits);
+ else
+ add_large_page_to_freelist(head, ph, sp_bits);
}
}
unsigned long alloc_size(void *pool, unsigned long poolsize, void *p)
{
- unsigned long len, pagenum;
- struct metaheader *mh;
-
- assert(poolsize >= MIN_SIZE);
-
- mh = first_mheader(pool, poolsize);
- assert((char *)p >= (char *)(mh + 1));
- assert((char *)pool + poolsize > (char *)p);
-
- pagenum = pool_offset(pool, p) / getpagesize();
-
- if (get_page_state(pool, pagenum) == SPECIAL) {
- unsigned long off = (unsigned long)p % getpagesize();
- uint8_t *metadata = get_page_metadata(pool, pagenum);
- enum sub_metadata_type type = get_bit_pair(metadata, 0);
-
- assert(off < SUBPAGE_METAOFF);
-
- switch (type) {
- case BITMAP:
- assert(off % BITMAP_GRANULARITY == 0);
- off /= BITMAP_GRANULARITY;
-
- /* Offset by one because first bit used for header. */
- off++;
- len = BITMAP_GRANULARITY;
- while (++off < SUBPAGE_METAOFF / BITMAP_GRANULARITY
- && get_bit_pair(metadata, off) == TAKEN)
- len += BITMAP_GRANULARITY;
- break;
- case UNIFORM:
- len = decode_usize(metadata);
- break;
- default:
- assert(0);
- }
- } else {
- len = getpagesize();
- while (get_page_state(pool, ++pagenum) == TAKEN)
- len += getpagesize();
- }
+ struct header *head = pool;
+ unsigned int pgnum, sp_bits;
+ unsigned long offset = (char *)p - (char *)pool;
+ struct page_header *ph;
- return len;
-}
+ if (poolsize < MIN_USEFUL_SIZE)
+ return tiny_alloc_size(pool, poolsize, p);
-static bool is_metadata_page(void *pool, unsigned long poolsize,
- unsigned long page)
-{
- struct metaheader *mh;
+ /* Get page header. */
+ sp_bits = small_page_bits(poolsize);
+ pgnum = offset >> sp_bits;
- for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
- unsigned long start, end;
+ /* Big page? Round down further. */
+ if (test_bit(head->pagesize, pgnum >> BITS_FROM_SMALL_TO_LARGE_PAGE))
+ pgnum &= ~(SMALL_PAGES_PER_LARGE_PAGE - 1);
- start = pool_offset(pool, mh);
- end = pool_offset(pool, (char *)(mh+1)
- + get_metalen(pool, poolsize, mh));
- if (page >= start/getpagesize() && page < end/getpagesize())
- return true;
- }
- return false;
+ /* Step back to page header. */
+ ph = from_pgnum(head, pgnum, sp_bits);
+ return bucket_to_size(ph->bucket);
}
-static bool check_bitmap_metadata(void *pool, unsigned long *mhoff)
+/* Useful for gdb breakpoints. */
+static bool check_fail(void)
{
- enum alloc_state last_state = FREE;
- unsigned int i;
-
- for (i = 0; i < SUBPAGE_METAOFF / BITMAP_GRANULARITY; i++) {
- enum alloc_state state;
-
- /* +1 because header is the first byte. */
- state = get_bit_pair((uint8_t *)pool + *mhoff, i+1);
- switch (state) {
- case SPECIAL:
- return false;
- case TAKEN:
- if (last_state == FREE)
- return false;
- break;
- default:
- break;
- }
- last_state = state;
- }
- return true;
+ return false;
}
-static bool check_uniform_metadata(void *pool, unsigned long *mhoff)
+static unsigned long count_bits(const unsigned long bitmap[],
+ unsigned long limit)
{
- uint8_t *meta = (uint8_t *)pool + *mhoff;
- unsigned int i, usize;
- struct uniform_cache *uc = pool;
+ unsigned long i, count = 0;
- usize = decode_usize(meta);
- if (usize == 0 || suitable_for_uc(usize, 1) != usize)
- return false;
+ while (limit >= BITS_PER_LONG) {
+ count += popcount(bitmap[0]);
+ bitmap++;
+ limit -= BITS_PER_LONG;
+ }
- /* If it's in uniform cache, make sure that agrees on size. */
- for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
- uint8_t *ucm;
+ for (i = 0; i < limit; i++)
+ if (test_bit(bitmap, i))
+ count++;
+ return count;
+}
- if (!uc->size[i])
- continue;
+static bool out_of_bounds(unsigned long pgnum,
+ unsigned int sp_bits,
+ unsigned long pagesize,
+ unsigned long poolsize)
+{
+ if (((pgnum << sp_bits) >> sp_bits) != pgnum)
+ return true;
- ucm = get_page_metadata(pool, uc->page[i]);
- if (ucm != meta)
- continue;
+ if ((pgnum << sp_bits) > poolsize)
+ return true;
- if (usize != uc->size[i])
- return false;
+ return ((pgnum << sp_bits) + pagesize > poolsize);
+}
+
+static bool check_bucket(struct header *head,
+ unsigned long poolsize,
+ unsigned long pages[],
+ struct bucket_state *bs,
+ unsigned int bindex)
+{
+ bool lp_bucket;
+ struct page_header *ph;
+ unsigned long taken, i, prev, pagesize, sp_bits, lp_bits;
+
+ sp_bits = small_page_bits(poolsize);
+ lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
+
+ lp_bucket = large_page_bucket(bindex, sp_bits);
+
+ pagesize = 1UL << (lp_bucket ? lp_bits : sp_bits);
+
+ /* This many elements fit? */
+ taken = page_header_size(bindex / INTER_BUCKET_SPACE,
+ bs->elements_per_page);
+ taken += bucket_to_size(bindex) * bs->elements_per_page;
+ if (taken > pagesize)
+ return check_fail();
+
+ /* One more wouldn't fit? */
+ taken = page_header_size(bindex / INTER_BUCKET_SPACE,
+ bs->elements_per_page + 1);
+ taken += bucket_to_size(bindex) * (bs->elements_per_page + 1);
+ if (taken <= pagesize)
+ return check_fail();
+
+ /* Walk used list. */
+ prev = 0;
+ for (i = bs->page_list; i; i = ph->next) {
+ /* Bad pointer? */
+ if (out_of_bounds(i, sp_bits, pagesize, poolsize))
+ return check_fail();
+ /* Wrong size page? */
+ if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
+ != lp_bucket)
+ return check_fail();
+ /* Large page not on boundary? */
+ if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
+ return check_fail();
+ ph = from_pgnum(head, i, sp_bits);
+ /* Linked list corrupt? */
+ if (ph->prev != prev)
+ return check_fail();
+ /* Already seen this page? */
+ if (test_bit(pages, i))
+ return check_fail();
+ set_bit(pages, i);
+ /* Empty or full? */
+ if (ph->elements_used == 0)
+ return check_fail();
+ if (ph->elements_used >= bs->elements_per_page)
+ return check_fail();
+ /* Used bits don't agree? */
+ if (ph->elements_used != count_bits(ph->used,
+ bs->elements_per_page))
+ return check_fail();
+ /* Wrong bucket? */
+ if (ph->bucket != bindex)
+ return check_fail();
+ prev = i;
}
- return true;
-}
-static bool check_subpage(void *pool, unsigned long poolsize,
- unsigned long page)
-{
- unsigned long *mhoff = metadata_off(pool, page);
-
- if (*mhoff + sizeof(struct metaheader) > poolsize)
- return false;
-
- if (*mhoff % ALIGNOF(struct metaheader) != 0)
- return false;
-
- /* It must point to a metadata page. */
- if (!is_metadata_page(pool, poolsize, *mhoff / getpagesize()))
- return false;
-
- /* Header at start of subpage allocation */
- switch (get_bit_pair((uint8_t *)pool + *mhoff, 0)) {
- case BITMAP:
- return check_bitmap_metadata(pool, mhoff);
- case UNIFORM:
- return check_uniform_metadata(pool, mhoff);
- default:
- return false;
+ /* Walk full list. */
+ prev = 0;
+ for (i = bs->full_list; i; i = ph->next) {
+ /* Bad pointer? */
+ if (out_of_bounds(i, sp_bits, pagesize, poolsize))
+ return check_fail();
+ /* Wrong size page? */
+ if (!!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE)
+ != lp_bucket)
+ /* Large page not on boundary? */
+ if (lp_bucket && (i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
+ return check_fail();
+ ph = from_pgnum(head, i, sp_bits);
+ /* Linked list corrupt? */
+ if (ph->prev != prev)
+ return check_fail();
+ /* Already seen this page? */
+ if (test_bit(pages, i))
+ return check_fail();
+ set_bit(pages, i);
+ /* Not full? */
+ if (ph->elements_used != bs->elements_per_page)
+ return check_fail();
+ /* Used bits don't agree? */
+ if (ph->elements_used != count_bits(ph->used,
+ bs->elements_per_page))
+ return check_fail();
+ /* Wrong bucket? */
+ if (ph->bucket != bindex)
+ return check_fail();
+ prev = i;
}
-
+ return true;
}
bool alloc_check(void *pool, unsigned long poolsize)
{
- unsigned long i;
- struct metaheader *mh;
- enum alloc_state last_state = FREE;
- bool was_metadata = false;
-
- if (poolsize < MIN_SIZE)
- return true;
-
- if (get_page_state(pool, 0) != TAKEN_START)
- return false;
-
- /* First check metadata pages. */
- /* Metadata pages will be marked TAKEN. */
- for (mh = first_mheader(pool,poolsize); mh; mh = next_mheader(pool,mh)){
- unsigned long start, end;
-
- start = pool_offset(pool, mh);
- if (start + sizeof(*mh) > poolsize)
- return false;
+ struct header *head = pool;
+ unsigned long prev, i, lp_bits, sp_bits, header_size, num_buckets;
+ struct page_header *ph;
+ unsigned long pages[MAX_SMALL_PAGES / BITS_PER_LONG] = { 0 };
+
+ if (poolsize < MIN_USEFUL_SIZE)
+ return tiny_alloc_check(pool, poolsize);
+
+ sp_bits = small_page_bits(poolsize);
+ lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
+
+ num_buckets = max_bucket(lp_bits);
+
+ header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
+
+ /* First, set all bits taken by header. */
+ for (i = 0; i < header_size; i += (1UL << sp_bits))
+ set_bit(pages, i >> sp_bits);
+
+ /* Check small page free list. */
+ prev = 0;
+ for (i = head->small_free_list; i; i = ph->next) {
+ /* Bad pointer? */
+ if (out_of_bounds(i, sp_bits, 1 << sp_bits, poolsize))
+ return check_fail();
+ /* Large page? */
+ if (test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
+ return check_fail();
+ ph = from_pgnum(head, i, sp_bits);
+ /* Linked list corrupt? */
+ if (ph->prev != prev)
+ return check_fail();
+ /* Already seen this page? */
+ if (test_bit(pages, i))
+ return check_fail();
+ set_bit(pages, i);
+ prev = i;
+ }
- end = pool_offset(pool, (char *)(mh+1)
- + get_metalen(pool, poolsize, mh));
- if (end > poolsize)
- return false;
+ /* Check large page free list. */
+ prev = 0;
+ for (i = head->large_free_list; i; i = ph->next) {
+ /* Bad pointer? */
+ if (out_of_bounds(i, sp_bits, 1 << lp_bits, poolsize))
+ return check_fail();
+ /* Not large page? */
+ if (!test_bit(head->pagesize, i >> BITS_FROM_SMALL_TO_LARGE_PAGE))
+ return check_fail();
+ /* Not page boundary? */
+ if ((i % SMALL_PAGES_PER_LARGE_PAGE) != 0)
+ return check_fail();
+ ph = from_pgnum(head, i, sp_bits);
+ /* Linked list corrupt? */
+ if (ph->prev != prev)
+ return check_fail();
+ /* Already seen this page? */
+ if (test_bit(pages, i))
+ return check_fail();
+ set_bit(pages, i);
+ prev = i;
+ }
- /* Non-first pages should start on a page boundary. */
- if (mh != first_mheader(pool, poolsize)
- && start % getpagesize() != 0)
- return false;
+ /* Check the buckets. */
+ for (i = 0; i < max_bucket(lp_bits); i++) {
+ struct bucket_state *bs = &head->bs[i];
- /* It should end on a page boundary. */
- if (end % getpagesize() != 0)
+ if (!check_bucket(head, poolsize, pages, bs, i))
return false;
}
- for (i = 0; i < poolsize / getpagesize(); i++) {
- enum alloc_state state = get_page_state(pool, i);
- bool is_metadata = is_metadata_page(pool, poolsize,i);
-
- switch (state) {
- case FREE:
- /* metadata pages are never free. */
- if (is_metadata)
- return false;
- case TAKEN_START:
- break;
- case TAKEN:
- /* This should continue a previous block. */
- if (last_state == FREE)
- return false;
- if (is_metadata != was_metadata)
- return false;
- break;
- case SPECIAL:
- /* Check metadata pointer etc. */
- if (!check_subpage(pool, poolsize, i))
- return false;
+ /* Make sure every page accounted for. */
+ for (i = 0; i < poolsize >> sp_bits; i++) {
+ if (!test_bit(pages, i))
+ return check_fail();
+ if (test_bit(head->pagesize,
+ i >> BITS_FROM_SMALL_TO_LARGE_PAGE)) {
+ /* Large page, skip rest. */
+ i += SMALL_PAGES_PER_LARGE_PAGE - 1;
}
- last_state = state;
- was_metadata = is_metadata;
}
+
return true;
}
-void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
+static unsigned long print_overhead(FILE *out, const char *desc,
+ unsigned long bytes,
+ unsigned long poolsize)
{
- struct metaheader *mh;
- struct uniform_cache *uc = pool;
- unsigned long pagebitlen, metadata_pages, count[1<<BITS_PER_PAGE], tot;
- long i;
-
- if (poolsize < MIN_SIZE) {
- fprintf(out, "Pool smaller than %u: no content\n", MIN_SIZE);
- return;
- }
-
- tot = 0;
- for (i = 0; i < UNIFORM_CACHE_NUM; i++)
- tot += (uc->size[i] != 0);
- fprintf(out, "Uniform cache (%lu entries):\n", tot);
- for (i = 0; i < UNIFORM_CACHE_NUM; i++) {
- unsigned int j, total = 0;
- uint8_t *meta;
-
- if (!uc->size[i])
- continue;
-
- /* First two bytes are header. */
- meta = get_page_metadata(pool, uc->page[i]) + 2;
+ fprintf(out, "Overhead (%s): %lu bytes (%.3g%%)\n",
+ desc, bytes, 100.0 * bytes / poolsize);
+ return bytes;
+}
- for (j = 0; j < SUBPAGE_METAOFF / uc->size[i]; j++)
- if (meta[j / 8] & (1 << (j % 8)))
- total++;
+static unsigned long count_list(struct header *head,
+ u16 pgnum,
+ unsigned int sp_bits,
+ unsigned long *total_elems)
+{
+ struct page_header *p;
+ unsigned long ret = 0;
- printf(" %u: %u/%u (%u%% density)\n",
- uc->size[j], total, SUBPAGE_METAOFF / uc->size[i],
- (total * 100) / (SUBPAGE_METAOFF / uc->size[i]));
+ while (pgnum) {
+ p = from_pgnum(head, pgnum, sp_bits);
+ if (total_elems)
+ (*total_elems) += p->elements_used;
+ ret++;
+ pgnum = p->next;
}
+ return ret;
+}
- memset(count, 0, sizeof(count));
- for (i = 0; i < poolsize / getpagesize(); i++)
- count[get_page_state(pool, i)]++;
-
- mh = first_mheader(pool, poolsize);
- pagebitlen = (uint8_t *)mh - get_page_statebits(pool);
- fprintf(out, "%lu bytes of page bits: FREE/TAKEN/TAKEN_START/SUBPAGE = %lu/%lu/%lu/%lu\n",
- pagebitlen, count[0], count[1], count[2], count[3]);
-
- /* One metadata page for every page of page bits. */
- metadata_pages = div_up(pagebitlen, getpagesize());
-
- /* Now do each metadata page. */
- for (; mh; mh = next_mheader(pool,mh)) {
- unsigned long free = 0, bitmapblocks = 0, uniformblocks = 0,
- len = 0, uniformlen = 0, bitmaplen = 0, metalen;
- uint8_t *meta = (uint8_t *)(mh + 1);
-
- metalen = get_metalen(pool, poolsize, mh);
- metadata_pages += (sizeof(*mh) + metalen) / getpagesize();
-
- for (i = 0; i < metalen * METADATA_PER_BYTE; i += len) {
- switch (get_bit_pair(meta, i)) {
- case FREE:
- len = 1;
- free++;
- break;
- case BITMAP:
- /* Skip over this allocated part. */
- len = BITMAP_METALEN * CHAR_BIT;
- bitmapblocks++;
- bitmaplen += len;
- break;
- case UNIFORM:
- /* Skip over this part. */
- len = decode_usize(meta + i/METADATA_PER_BYTE);
- len = uniform_metalen(len) * METADATA_PER_BYTE;
- uniformblocks++;
- uniformlen += len;
- break;
- default:
- assert(0);
- }
- }
+static unsigned long visualize_bucket(FILE *out, struct header *head,
+ unsigned int bucket,
+ unsigned long poolsize,
+ unsigned int sp_bits)
+{
+ unsigned long num_full, num_partial, num_pages, page_size,
+ elems, hdr_min, hdr_size, elems_per_page, overhead = 0;
- fprintf(out, "Metadata %lu-%lu: %lu free, %lu bitmapblocks, %lu uniformblocks, %lu%% density\n",
- pool_offset(pool, mh),
- pool_offset(pool, (char *)(mh+1) + metalen),
- free, bitmapblocks, uniformblocks,
- (bitmaplen + uniformlen) * 100
- / (free + bitmaplen + uniformlen));
- }
+ elems_per_page = head->bs[bucket].elements_per_page;
+
+ /* If we used byte-based bitmaps, we could get pg hdr to: */
+ hdr_min = sizeof(struct page_header)
+ - sizeof(((struct page_header *)0)->used)
+ + align_up(elems_per_page, CHAR_BIT) / CHAR_BIT;
+ hdr_size = page_header_size(bucket / INTER_BUCKET_SPACE,
+ elems_per_page);
- /* Account for total pages allocated. */
- tot = (count[1] + count[2] - metadata_pages) * getpagesize();
+ elems = 0;
+ num_full = count_list(head, head->bs[bucket].full_list, sp_bits,
+ &elems);
+ num_partial = count_list(head, head->bs[bucket].page_list, sp_bits,
+ &elems);
+ num_pages = num_full + num_partial;
+ if (!num_pages)
+ return 0;
- fprintf(out, "Total metadata bytes = %lu\n",
- metadata_pages * getpagesize());
+ fprintf(out, "Bucket %u (%lu bytes):"
+ " %lu full, %lu partial = %lu elements\n",
+ bucket, bucket_to_size(bucket), num_full, num_partial, elems);
+ /* Strict requirement of page header size. */
+ overhead += print_overhead(out, "page headers",
+ hdr_min * num_pages, poolsize);
+ /* Gap between minimal page header and actual start. */
+ overhead += print_overhead(out, "page post-header alignments",
+ (hdr_size - hdr_min) * num_pages, poolsize);
+ /* Between last element and end of page. */
+ page_size = (1 << sp_bits);
+ if (large_page_bucket(bucket, sp_bits))
+ page_size <<= BITS_FROM_SMALL_TO_LARGE_PAGE;
+
+ overhead += print_overhead(out, "page tails",
+ (page_size - (hdr_size
+ + (elems_per_page
+ * bucket_to_size(bucket))))
+ * num_pages, poolsize);
+ return overhead;
+}
- /* Now do every subpage. */
- for (i = 0; i < poolsize / getpagesize(); i++) {
- uint8_t *meta;
- unsigned int j, allocated;
- enum sub_metadata_type type;
+void alloc_visualize(FILE *out, void *pool, unsigned long poolsize)
+{
+ struct header *head = pool;
+ unsigned long i, lp_bits, sp_bits, header_size, num_buckets, count,
+ overhead = 0;
- if (get_page_state(pool, i) != SPECIAL)
- continue;
+ fprintf(out, "Pool %p size %lu: (%s allocator)\n", pool, poolsize,
+ poolsize < MIN_USEFUL_SIZE ? "tiny" : "standard");
- memset(count, 0, sizeof(count));
-
- meta = get_page_metadata(pool, i);
- type = get_bit_pair(meta, 0);
-
- if (type == BITMAP) {
- for (j = 0; j < SUBPAGE_METAOFF/BITMAP_GRANULARITY; j++)
- count[get_page_state(meta, j)]++;
- allocated = (count[1] + count[2]) * BITMAP_GRANULARITY;
- fprintf(out, "Subpage bitmap ");
- } else {
- unsigned int usize = decode_usize(meta);
-
- assert(type == UNIFORM);
- fprintf(out, "Subpage uniform (%u) ", usize);
- meta += 2;
- for (j = 0; j < SUBPAGE_METAOFF / usize; j++)
- count[!!(meta[j / 8] & (1 << (j % 8)))]++;
- allocated = count[1] * usize;
- }
- fprintf(out, "%lu: FREE/TAKEN/TAKEN_START = %lu/%lu/%lu %u%% density\n",
- i, count[0], count[1], count[2],
- allocated * 100 / getpagesize());
- tot += allocated;
+ if (poolsize < MIN_USEFUL_SIZE) {
+ tiny_alloc_visualize(out, pool, poolsize);
+ return;
}
-
- /* This is optimistic, since we overalloc in several cases. */
- fprintf(out, "Best possible allocation density = %lu%%\n",
- tot * 100 / poolsize);
+
+ sp_bits = small_page_bits(poolsize);
+ lp_bits = sp_bits + BITS_FROM_SMALL_TO_LARGE_PAGE;
+
+ num_buckets = max_bucket(lp_bits);
+ header_size = sizeof(*head) + sizeof(head->bs) * (num_buckets-1);
+
+ fprintf(out, "Large page size %lu, small page size %lu.\n",
+ 1UL << lp_bits, 1UL << sp_bits);
+ overhead += print_overhead(out, "unused pool tail",
+ poolsize % (1 << lp_bits), poolsize);
+ fprintf(out, "Main header %lu bytes (%lu small pages).\n",
+ header_size, align_up(header_size, 1 << sp_bits) >> sp_bits);
+ overhead += print_overhead(out, "partial header page",
+ align_up(header_size, 1 << sp_bits)
+ - header_size, poolsize);
+ /* Total large pages. */
+ i = count_bits(head->pagesize, poolsize >> lp_bits);
+ /* Used pages. */
+ count = i - count_list(head, head->large_free_list, sp_bits, NULL);
+ fprintf(out, "%lu/%lu large pages used (%.3g%%)\n",
+ count, i, count ? 100.0 * count / i : 0.0);
+
+ /* Total small pages. */
+ i = (poolsize >> lp_bits) - i;
+ /* Used pages */
+ count = i - count_list(head, head->small_free_list, sp_bits, NULL);
+ fprintf(out, "%lu/%lu small pages used (%.3g%%)\n",
+ count, i, count ? 100.0 * count / i : 0.0);
+
+ /* Summary of each bucket. */
+ fprintf(out, "%lu buckets:\n", num_buckets);
+ for (i = 0; i < num_buckets; i++)
+ overhead += visualize_bucket(out, head, i, poolsize, sp_bits);
+
+ print_overhead(out, "total", overhead, poolsize);
}