/*- * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting * * This module implements a general bitmap allocator/deallocator. The * allocator eats around 2 bits per 'block'. The module does not * try to interpret the meaning of a 'block' other then to return * SWAPBLK_NONE on an allocation failure. * * A radix tree is used to maintain the bitmap. Two radix constants are * involved: One for the bitmaps contained in the leaf nodes (typically * 32), and one for the meta nodes (typically 16). Both meta and leaf * nodes have a hint field. This field gives us a hint as to the largest * free contiguous range of blocks under the node. It may contain a * value that is too high, but will never contain a value that is too * low. When the radix tree is searched, allocation failures in subtrees * update the hint. * * The radix tree also implements two collapsed states for meta nodes: * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is * in either of these two states, all information contained underneath * the node is considered stale. These states are used to optimize * allocation and freeing operations. * * The hinting greatly increases code efficiency for allocations while * the general radix structure optimizes both allocations and frees. The * radix tree should be able to operate well no matter how much * fragmentation there is and no matter how large a bitmap is used. * * Unlike the rlist code, the blist code wires all necessary memory at * creation time. Neither allocations nor frees require interaction with * the memory subsystem. In contrast, the rlist code may allocate memory * on an rlist_free() call. The non-blocking features of the blist code * are used to great advantage in the swap code (vm/nswap_pager.c). The * rlist code uses a little less overall memory then the blist code (but * due to swap interleaving not all that much less), but the blist code * scales much, much better. * * LAYOUT: The radix tree is layed out recursively using a * linear array. Each meta node is immediately followed (layed out * sequentially in memory) by BLIST_META_RADIX lower level nodes. This * is a recursive structure but one that can be easily scanned through * a very simple 'skip' calculation. In order to support large radixes, * portions of the tree may reside outside our memory allocation. We * handle this with an early-termination optimization (when bighint is * set to -1) on the scan. The memory allocation is only large enough * to cover the number of blocks requested at creation time even if it * must be encompassed in larger root-node radix. * * NOTE: the allocator cannot currently allocate more then * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too * large' if you try. This is an area that could use improvement. The * radix is large enough that this restriction does not effect the swap * system, though. Currently only the allocation code is effected by * this algorithmic unfeature. The freeing code can handle arbitrary * ranges. * * This code can be compiled stand-alone for debugging. */ /* * NOTE: a few modifications is made in the orginal FreeBSD code: * 1. change the name of some variables/constants * 2. discard the code handling ALL_FREE and ALL_ALLOCATED state * 3. allocate more than 32 slots won't lead to kernel panic * 4. remove all the code for debugging * * Zhao Shuai * upczhsh@163.com */ #include #include #define TERMINATOR -1 static uint32 radix_bitmap_init(radix_node *node, uint32 radix, uint32 skip, uint32 slots) { uint32 index = 0; int32 big_hint = radix < slots ? radix : slots; // leaf node if (radix == BITMAP_RADIX) { if (node) { node->big_hint = big_hint; if (big_hint == BITMAP_RADIX) node->u.bitmap = 0; else node->u.bitmap = (bitmap_t)-1 << big_hint; } return index; } // not leaf node if (node) { node->big_hint = big_hint; node->u.available = big_hint; } radix /= NODE_RADIX; uint32 next_skip = skip / NODE_RADIX; uint32 i; for (i = 1; i <= skip; i += next_skip) { if (slots >= radix) { index = i + radix_bitmap_init(node ? &node[i] : NULL, radix, next_skip - 1, radix); slots -= radix; } else if (slots > 0) { index = i + radix_bitmap_init(node ? &node[i] : NULL, radix, next_skip - 1, slots); slots = 0; } else { // add a terminator if (node) node[i].big_hint = TERMINATOR; break; } } if (index < i) index = i; return index; } radix_bitmap * radix_bitmap_create(uint32 slots) { uint32 radix = BITMAP_RADIX; uint32 skip = 0; while (radix < slots) { radix *= NODE_RADIX; skip = (skip + 1) * NODE_RADIX; } radix_bitmap *bmp = (radix_bitmap *)malloc(sizeof(radix_bitmap)); if (bmp == NULL) return NULL; bmp->radix = radix; bmp->skip = skip; bmp->free_slots = slots; bmp->root_size = 1 + radix_bitmap_init(NULL, radix, skip, slots); bmp->root = (radix_node *)malloc(bmp->root_size * sizeof(radix_node)); if (bmp->root == NULL) { free(bmp); return NULL; } radix_bitmap_init(bmp->root, radix, skip, slots); return bmp; } void radix_bitmap_destroy(radix_bitmap *bmp) { free(bmp->root); free(bmp); } static radix_slot_t radix_leaf_alloc(radix_node *leaf, radix_slot_t slotIndex, int32 count) { if (count <= (int32)BITMAP_RADIX) { bitmap_t bitmap = ~leaf->u.bitmap; uint32 n = BITMAP_RADIX - count; bitmap_t mask = (bitmap_t)-1 >> n; for (uint32 j = 0; j <= n; j++) { if ((bitmap & mask) == mask) { leaf->u.bitmap |= mask; return (slotIndex + j); } mask <<= 1; } } // we could not allocate count here, update big_hint if (leaf->big_hint >= count) leaf->big_hint = count - 1; return RADIX_SLOT_NONE; } static radix_slot_t radix_node_alloc(radix_node *node, radix_slot_t slotIndex, int32 count, uint32 radix, uint32 skip) { uint32 next_skip = skip / NODE_RADIX; radix /= NODE_RADIX; for (uint32 i = 1; i <= skip; i += next_skip) { if (node[i].big_hint == TERMINATOR) // TERMINATOR break; if (count <= node[i].big_hint) { radix_slot_t addr = RADIX_SLOT_NONE; if (next_skip == 1) addr = radix_leaf_alloc(&node[i], slotIndex, count); else addr = radix_node_alloc(&node[i], slotIndex, count, radix, next_skip - 1); if (addr != RADIX_SLOT_NONE) { node->u.available -= count; if (node->big_hint > node->u.available) node->big_hint = node->u.available; return addr; } } slotIndex += radix; } // we could not allocate count in the subtree, update big_hint if (node->big_hint >= count) node->big_hint = count - 1; return RADIX_SLOT_NONE; } radix_slot_t radix_bitmap_alloc(radix_bitmap *bmp, uint32 count) { radix_slot_t addr = RADIX_SLOT_NONE; if (bmp->radix == BITMAP_RADIX) addr = radix_leaf_alloc(bmp->root, 0, count); else addr = radix_node_alloc(bmp->root, 0, count, bmp->radix, bmp->skip); if (addr != RADIX_SLOT_NONE) bmp->free_slots -= count; return addr; } static void radix_leaf_dealloc(radix_node *leaf, radix_slot_t slotIndex, uint32 count) { uint32 n = slotIndex & (BITMAP_RADIX - 1); bitmap_t mask = ((bitmap_t)-1 >> (BITMAP_RADIX - count - n)) & ((bitmap_t)-1 << n); leaf->u.bitmap &= ~mask; leaf->big_hint = BITMAP_RADIX; } static void radix_node_dealloc(radix_node *node, radix_slot_t slotIndex, uint32 count, uint32 radix, uint32 skip, radix_slot_t index) { node->u.available += count; uint32 next_skip = skip / NODE_RADIX; radix /= NODE_RADIX; uint32 i = (slotIndex - index) / radix; index += i * radix; i = i * next_skip + 1; while (i <= skip && index < slotIndex + count) { uint32 v = index + radix - slotIndex; if (v > count) v = count; if (next_skip == 1) radix_leaf_dealloc(&node[i], slotIndex, v); else radix_node_dealloc(&node[i], slotIndex, v, radix, next_skip - 1, index); if (node->big_hint < node[i].big_hint) node->big_hint = node[i].big_hint; count -= v; slotIndex += v; index += radix; i += next_skip; } } void radix_bitmap_dealloc(radix_bitmap *bmp, radix_slot_t slotIndex, uint32 count) { if (bmp->radix == BITMAP_RADIX) radix_leaf_dealloc(bmp->root, slotIndex, count); else radix_node_dealloc(bmp->root, slotIndex, count, bmp->radix, bmp->skip, 0); bmp->free_slots += count; }