1dcadb2baSIngo Weinhold /*-
2dcadb2baSIngo Weinhold * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3dcadb2baSIngo Weinhold * Redistribution and use in source and binary forms, with or without
4dcadb2baSIngo Weinhold * modification, are permitted provided that the following conditions
5dcadb2baSIngo Weinhold * are met:
6dcadb2baSIngo Weinhold * 1. Redistributions of source code must retain the above copyright
7dcadb2baSIngo Weinhold * notice, this list of conditions and the following disclaimer.
8dcadb2baSIngo Weinhold * 2. Redistributions in binary form must reproduce the above copyright
9dcadb2baSIngo Weinhold * notice, this list of conditions and the following disclaimer in the
10dcadb2baSIngo Weinhold * documentation and/or other materials provided with the distribution.
11dcadb2baSIngo Weinhold * 4. Neither the name of the University nor the names of its contributors
12dcadb2baSIngo Weinhold * may be used to endorse or promote products derived from this software
13dcadb2baSIngo Weinhold * without specific prior written permission.
14dcadb2baSIngo Weinhold *
15dcadb2baSIngo Weinhold * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16dcadb2baSIngo Weinhold * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17dcadb2baSIngo Weinhold * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18dcadb2baSIngo Weinhold * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19dcadb2baSIngo Weinhold * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20dcadb2baSIngo Weinhold * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21dcadb2baSIngo Weinhold * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22dcadb2baSIngo Weinhold * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23dcadb2baSIngo Weinhold * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24dcadb2baSIngo Weinhold * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25dcadb2baSIngo Weinhold * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26dcadb2baSIngo Weinhold */
27dcadb2baSIngo Weinhold
28dcadb2baSIngo Weinhold /*
29dcadb2baSIngo Weinhold * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
30dcadb2baSIngo Weinhold *
31dcadb2baSIngo Weinhold * This module implements a general bitmap allocator/deallocator. The
32dcadb2baSIngo Weinhold * allocator eats around 2 bits per 'block'. The module does not
33dcadb2baSIngo Weinhold * try to interpret the meaning of a 'block' other then to return
34dcadb2baSIngo Weinhold * SWAPBLK_NONE on an allocation failure.
35dcadb2baSIngo Weinhold *
36dcadb2baSIngo Weinhold * A radix tree is used to maintain the bitmap. Two radix constants are
37dcadb2baSIngo Weinhold * involved: One for the bitmaps contained in the leaf nodes (typically
38dcadb2baSIngo Weinhold * 32), and one for the meta nodes (typically 16). Both meta and leaf
39dcadb2baSIngo Weinhold * nodes have a hint field. This field gives us a hint as to the largest
40dcadb2baSIngo Weinhold * free contiguous range of blocks under the node. It may contain a
41dcadb2baSIngo Weinhold * value that is too high, but will never contain a value that is too
42dcadb2baSIngo Weinhold * low. When the radix tree is searched, allocation failures in subtrees
43dcadb2baSIngo Weinhold * update the hint.
44dcadb2baSIngo Weinhold *
45dcadb2baSIngo Weinhold * The radix tree also implements two collapsed states for meta nodes:
46dcadb2baSIngo Weinhold * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
47dcadb2baSIngo Weinhold * in either of these two states, all information contained underneath
48dcadb2baSIngo Weinhold * the node is considered stale. These states are used to optimize
49dcadb2baSIngo Weinhold * allocation and freeing operations.
50dcadb2baSIngo Weinhold *
51dcadb2baSIngo Weinhold * The hinting greatly increases code efficiency for allocations while
52dcadb2baSIngo Weinhold * the general radix structure optimizes both allocations and frees. The
53dcadb2baSIngo Weinhold * radix tree should be able to operate well no matter how much
54dcadb2baSIngo Weinhold * fragmentation there is and no matter how large a bitmap is used.
55dcadb2baSIngo Weinhold *
56dcadb2baSIngo Weinhold * Unlike the rlist code, the blist code wires all necessary memory at
57dcadb2baSIngo Weinhold * creation time. Neither allocations nor frees require interaction with
58dcadb2baSIngo Weinhold * the memory subsystem. In contrast, the rlist code may allocate memory
59dcadb2baSIngo Weinhold * on an rlist_free() call. The non-blocking features of the blist code
60dcadb2baSIngo Weinhold * are used to great advantage in the swap code (vm/nswap_pager.c). The
61dcadb2baSIngo Weinhold * rlist code uses a little less overall memory then the blist code (but
62dcadb2baSIngo Weinhold * due to swap interleaving not all that much less), but the blist code
63dcadb2baSIngo Weinhold * scales much, much better.
64dcadb2baSIngo Weinhold *
65dcadb2baSIngo Weinhold * LAYOUT: The radix tree is layed out recursively using a
66dcadb2baSIngo Weinhold * linear array. Each meta node is immediately followed (layed out
67dcadb2baSIngo Weinhold * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
68dcadb2baSIngo Weinhold * is a recursive structure but one that can be easily scanned through
69dcadb2baSIngo Weinhold * a very simple 'skip' calculation. In order to support large radixes,
70dcadb2baSIngo Weinhold * portions of the tree may reside outside our memory allocation. We
71dcadb2baSIngo Weinhold * handle this with an early-termination optimization (when bighint is
72dcadb2baSIngo Weinhold * set to -1) on the scan. The memory allocation is only large enough
73dcadb2baSIngo Weinhold * to cover the number of blocks requested at creation time even if it
74dcadb2baSIngo Weinhold * must be encompassed in larger root-node radix.
75dcadb2baSIngo Weinhold *
76dcadb2baSIngo Weinhold * NOTE: the allocator cannot currently allocate more then
77dcadb2baSIngo Weinhold * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78dcadb2baSIngo Weinhold * large' if you try. This is an area that could use improvement. The
79dcadb2baSIngo Weinhold * radix is large enough that this restriction does not effect the swap
80dcadb2baSIngo Weinhold * system, though. Currently only the allocation code is effected by
81dcadb2baSIngo Weinhold * this algorithmic unfeature. The freeing code can handle arbitrary
82dcadb2baSIngo Weinhold * ranges.
83dcadb2baSIngo Weinhold *
84dcadb2baSIngo Weinhold * This code can be compiled stand-alone for debugging.
85dcadb2baSIngo Weinhold */
86dcadb2baSIngo Weinhold
87dcadb2baSIngo Weinhold /*
88dcadb2baSIngo Weinhold * NOTE: a few modifications is made in the orginal FreeBSD code:
89dcadb2baSIngo Weinhold * 1. change the name of some variables/constants
90dcadb2baSIngo Weinhold * 2. discard the code handling ALL_FREE and ALL_ALLOCATED state
91dcadb2baSIngo Weinhold * 3. allocate more than 32 slots won't lead to kernel panic
92dcadb2baSIngo Weinhold * 4. remove all the code for debugging
93dcadb2baSIngo Weinhold *
94dcadb2baSIngo Weinhold * Zhao Shuai
95dcadb2baSIngo Weinhold * upczhsh@163.com
96dcadb2baSIngo Weinhold */
97dcadb2baSIngo Weinhold
98dcadb2baSIngo Weinhold
99dcadb2baSIngo Weinhold #include <stdlib.h>
100dcadb2baSIngo Weinhold #include <util/RadixBitmap.h>
101dcadb2baSIngo Weinhold
102dcadb2baSIngo Weinhold #define TERMINATOR -1
103dcadb2baSIngo Weinhold
104dcadb2baSIngo Weinhold
105dcadb2baSIngo Weinhold static uint32
radix_bitmap_init(radix_node * node,uint32 radix,uint32 skip,uint32 slots)106dcadb2baSIngo Weinhold radix_bitmap_init(radix_node *node, uint32 radix, uint32 skip, uint32 slots)
107dcadb2baSIngo Weinhold {
108dcadb2baSIngo Weinhold uint32 index = 0;
109dcadb2baSIngo Weinhold int32 big_hint = radix < slots ? radix : slots;
110dcadb2baSIngo Weinhold
111dcadb2baSIngo Weinhold // leaf node
112dcadb2baSIngo Weinhold if (radix == BITMAP_RADIX) {
113dcadb2baSIngo Weinhold if (node) {
114dcadb2baSIngo Weinhold node->big_hint = big_hint;
115dcadb2baSIngo Weinhold if (big_hint == BITMAP_RADIX)
116dcadb2baSIngo Weinhold node->u.bitmap = 0;
117dcadb2baSIngo Weinhold else
118dcadb2baSIngo Weinhold node->u.bitmap = (bitmap_t)-1 << big_hint;
119dcadb2baSIngo Weinhold }
120dcadb2baSIngo Weinhold return index;
121dcadb2baSIngo Weinhold }
122dcadb2baSIngo Weinhold
123dcadb2baSIngo Weinhold // not leaf node
124dcadb2baSIngo Weinhold if (node) {
125dcadb2baSIngo Weinhold node->big_hint = big_hint;
126dcadb2baSIngo Weinhold node->u.available = big_hint;
127dcadb2baSIngo Weinhold }
128dcadb2baSIngo Weinhold
129dcadb2baSIngo Weinhold radix /= NODE_RADIX;
130dcadb2baSIngo Weinhold uint32 next_skip = skip / NODE_RADIX;
131dcadb2baSIngo Weinhold
132dcadb2baSIngo Weinhold uint32 i;
133dcadb2baSIngo Weinhold for (i = 1; i <= skip; i += next_skip) {
134dcadb2baSIngo Weinhold if (slots >= radix) {
135dcadb2baSIngo Weinhold index = i + radix_bitmap_init(node ? &node[i] : NULL,
136dcadb2baSIngo Weinhold radix, next_skip - 1, radix);
137dcadb2baSIngo Weinhold slots -= radix;
138dcadb2baSIngo Weinhold } else if (slots > 0) {
139dcadb2baSIngo Weinhold index = i + radix_bitmap_init(node ? &node[i] : NULL,
140dcadb2baSIngo Weinhold radix, next_skip - 1, slots);
141dcadb2baSIngo Weinhold slots = 0;
142dcadb2baSIngo Weinhold } else { // add a terminator
143dcadb2baSIngo Weinhold if (node)
144dcadb2baSIngo Weinhold node[i].big_hint = TERMINATOR;
145dcadb2baSIngo Weinhold break;
146dcadb2baSIngo Weinhold }
147dcadb2baSIngo Weinhold }
148dcadb2baSIngo Weinhold
149dcadb2baSIngo Weinhold if (index < i)
150dcadb2baSIngo Weinhold index = i;
151dcadb2baSIngo Weinhold
152dcadb2baSIngo Weinhold return index;
153dcadb2baSIngo Weinhold }
154dcadb2baSIngo Weinhold
155dcadb2baSIngo Weinhold
156dcadb2baSIngo Weinhold radix_bitmap *
radix_bitmap_create(uint32 slots)157dcadb2baSIngo Weinhold radix_bitmap_create(uint32 slots)
158dcadb2baSIngo Weinhold {
159dcadb2baSIngo Weinhold uint32 radix = BITMAP_RADIX;
160dcadb2baSIngo Weinhold uint32 skip = 0;
161dcadb2baSIngo Weinhold
162dcadb2baSIngo Weinhold while (radix < slots) {
163dcadb2baSIngo Weinhold radix *= NODE_RADIX;
164dcadb2baSIngo Weinhold skip = (skip + 1) * NODE_RADIX;
165dcadb2baSIngo Weinhold }
166dcadb2baSIngo Weinhold
167dcadb2baSIngo Weinhold radix_bitmap *bmp = (radix_bitmap *)malloc(sizeof(radix_bitmap));
168dcadb2baSIngo Weinhold if (bmp == NULL)
169dcadb2baSIngo Weinhold return NULL;
170dcadb2baSIngo Weinhold
171dcadb2baSIngo Weinhold bmp->radix = radix;
172dcadb2baSIngo Weinhold bmp->skip = skip;
173dcadb2baSIngo Weinhold bmp->free_slots = slots;
174dcadb2baSIngo Weinhold bmp->root_size = 1 + radix_bitmap_init(NULL, radix, skip, slots);
175dcadb2baSIngo Weinhold
176dcadb2baSIngo Weinhold bmp->root = (radix_node *)malloc(bmp->root_size * sizeof(radix_node));
177dcadb2baSIngo Weinhold if (bmp->root == NULL) {
178dcadb2baSIngo Weinhold free(bmp);
179dcadb2baSIngo Weinhold return NULL;
180dcadb2baSIngo Weinhold }
181dcadb2baSIngo Weinhold
182dcadb2baSIngo Weinhold radix_bitmap_init(bmp->root, radix, skip, slots);
183dcadb2baSIngo Weinhold
184dcadb2baSIngo Weinhold return bmp;
185dcadb2baSIngo Weinhold }
186dcadb2baSIngo Weinhold
187dcadb2baSIngo Weinhold
188dcadb2baSIngo Weinhold void
radix_bitmap_destroy(radix_bitmap * bmp)189dcadb2baSIngo Weinhold radix_bitmap_destroy(radix_bitmap *bmp)
190dcadb2baSIngo Weinhold {
191dcadb2baSIngo Weinhold free(bmp->root);
192dcadb2baSIngo Weinhold free(bmp);
193dcadb2baSIngo Weinhold }
194dcadb2baSIngo Weinhold
195dcadb2baSIngo Weinhold
196*8c9b84a5SIngo Weinhold static radix_slot_t
radix_leaf_alloc(radix_node * leaf,radix_slot_t slotIndex,int32 count)197*8c9b84a5SIngo Weinhold radix_leaf_alloc(radix_node *leaf, radix_slot_t slotIndex, int32 count)
198dcadb2baSIngo Weinhold {
199b50e6202SIngo Weinhold if (count <= (int32)BITMAP_RADIX) {
200dcadb2baSIngo Weinhold bitmap_t bitmap = ~leaf->u.bitmap;
201dcadb2baSIngo Weinhold uint32 n = BITMAP_RADIX - count;
202dcadb2baSIngo Weinhold bitmap_t mask = (bitmap_t)-1 >> n;
203dcadb2baSIngo Weinhold for (uint32 j = 0; j <= n; j++) {
204dcadb2baSIngo Weinhold if ((bitmap & mask) == mask) {
205dcadb2baSIngo Weinhold leaf->u.bitmap |= mask;
206dcadb2baSIngo Weinhold return (slotIndex + j);
207dcadb2baSIngo Weinhold }
208dcadb2baSIngo Weinhold mask <<= 1;
209dcadb2baSIngo Weinhold }
210dcadb2baSIngo Weinhold }
211dcadb2baSIngo Weinhold
212dcadb2baSIngo Weinhold // we could not allocate count here, update big_hint
213dcadb2baSIngo Weinhold if (leaf->big_hint >= count)
214dcadb2baSIngo Weinhold leaf->big_hint = count - 1;
215*8c9b84a5SIngo Weinhold return RADIX_SLOT_NONE;
216dcadb2baSIngo Weinhold }
217dcadb2baSIngo Weinhold
218dcadb2baSIngo Weinhold
219*8c9b84a5SIngo Weinhold static radix_slot_t
radix_node_alloc(radix_node * node,radix_slot_t slotIndex,int32 count,uint32 radix,uint32 skip)220*8c9b84a5SIngo Weinhold radix_node_alloc(radix_node *node, radix_slot_t slotIndex, int32 count,
221dcadb2baSIngo Weinhold uint32 radix, uint32 skip)
222dcadb2baSIngo Weinhold {
223dcadb2baSIngo Weinhold uint32 next_skip = skip / NODE_RADIX;
224dcadb2baSIngo Weinhold radix /= NODE_RADIX;
225dcadb2baSIngo Weinhold
226dcadb2baSIngo Weinhold for (uint32 i = 1; i <= skip; i += next_skip) {
227dcadb2baSIngo Weinhold if (node[i].big_hint == TERMINATOR) // TERMINATOR
228dcadb2baSIngo Weinhold break;
229dcadb2baSIngo Weinhold
230dcadb2baSIngo Weinhold if (count <= node[i].big_hint) {
231*8c9b84a5SIngo Weinhold radix_slot_t addr = RADIX_SLOT_NONE;
232dcadb2baSIngo Weinhold if (next_skip == 1)
233dcadb2baSIngo Weinhold addr = radix_leaf_alloc(&node[i], slotIndex, count);
234dcadb2baSIngo Weinhold else
235dcadb2baSIngo Weinhold addr = radix_node_alloc(&node[i], slotIndex, count, radix,
236dcadb2baSIngo Weinhold next_skip - 1);
237*8c9b84a5SIngo Weinhold if (addr != RADIX_SLOT_NONE) {
238dcadb2baSIngo Weinhold node->u.available -= count;
239dcadb2baSIngo Weinhold if (node->big_hint > node->u.available)
240dcadb2baSIngo Weinhold node->big_hint = node->u.available;
241dcadb2baSIngo Weinhold
242dcadb2baSIngo Weinhold return addr;
243dcadb2baSIngo Weinhold }
244dcadb2baSIngo Weinhold }
245dcadb2baSIngo Weinhold slotIndex += radix;
246dcadb2baSIngo Weinhold }
247dcadb2baSIngo Weinhold
248dcadb2baSIngo Weinhold // we could not allocate count in the subtree, update big_hint
249dcadb2baSIngo Weinhold if (node->big_hint >= count)
250dcadb2baSIngo Weinhold node->big_hint = count - 1;
251*8c9b84a5SIngo Weinhold return RADIX_SLOT_NONE;
252dcadb2baSIngo Weinhold }
253dcadb2baSIngo Weinhold
254dcadb2baSIngo Weinhold
255*8c9b84a5SIngo Weinhold radix_slot_t
radix_bitmap_alloc(radix_bitmap * bmp,uint32 count)256dcadb2baSIngo Weinhold radix_bitmap_alloc(radix_bitmap *bmp, uint32 count)
257dcadb2baSIngo Weinhold {
258*8c9b84a5SIngo Weinhold radix_slot_t addr = RADIX_SLOT_NONE;
259dcadb2baSIngo Weinhold
260dcadb2baSIngo Weinhold if (bmp->radix == BITMAP_RADIX)
261dcadb2baSIngo Weinhold addr = radix_leaf_alloc(bmp->root, 0, count);
262dcadb2baSIngo Weinhold else
263dcadb2baSIngo Weinhold addr = radix_node_alloc(bmp->root, 0, count, bmp->radix, bmp->skip);
264dcadb2baSIngo Weinhold
265*8c9b84a5SIngo Weinhold if (addr != RADIX_SLOT_NONE)
266dcadb2baSIngo Weinhold bmp->free_slots -= count;
267dcadb2baSIngo Weinhold
268dcadb2baSIngo Weinhold return addr;
269dcadb2baSIngo Weinhold }
270dcadb2baSIngo Weinhold
271dcadb2baSIngo Weinhold
272dcadb2baSIngo Weinhold static void
radix_leaf_dealloc(radix_node * leaf,radix_slot_t slotIndex,uint32 count)273*8c9b84a5SIngo Weinhold radix_leaf_dealloc(radix_node *leaf, radix_slot_t slotIndex, uint32 count)
274dcadb2baSIngo Weinhold {
275dcadb2baSIngo Weinhold uint32 n = slotIndex & (BITMAP_RADIX - 1);
276dcadb2baSIngo Weinhold bitmap_t mask = ((bitmap_t)-1 >> (BITMAP_RADIX - count - n))
277dcadb2baSIngo Weinhold & ((bitmap_t)-1 << n);
278dcadb2baSIngo Weinhold leaf->u.bitmap &= ~mask;
279dcadb2baSIngo Weinhold
280dcadb2baSIngo Weinhold leaf->big_hint = BITMAP_RADIX;
281dcadb2baSIngo Weinhold }
282dcadb2baSIngo Weinhold
283dcadb2baSIngo Weinhold
284dcadb2baSIngo Weinhold static void
radix_node_dealloc(radix_node * node,radix_slot_t slotIndex,uint32 count,uint32 radix,uint32 skip,radix_slot_t index)285*8c9b84a5SIngo Weinhold radix_node_dealloc(radix_node *node, radix_slot_t slotIndex, uint32 count,
286*8c9b84a5SIngo Weinhold uint32 radix, uint32 skip, radix_slot_t index)
287dcadb2baSIngo Weinhold {
288dcadb2baSIngo Weinhold node->u.available += count;
289dcadb2baSIngo Weinhold
290dcadb2baSIngo Weinhold uint32 next_skip = skip / NODE_RADIX;
291dcadb2baSIngo Weinhold radix /= NODE_RADIX;
292dcadb2baSIngo Weinhold
293dcadb2baSIngo Weinhold uint32 i = (slotIndex - index) / radix;
294dcadb2baSIngo Weinhold index += i * radix;
295dcadb2baSIngo Weinhold i = i * next_skip + 1;
296dcadb2baSIngo Weinhold
297dcadb2baSIngo Weinhold while (i <= skip && index < slotIndex + count) {
298dcadb2baSIngo Weinhold uint32 v = index + radix - slotIndex;
299dcadb2baSIngo Weinhold if (v > count)
300dcadb2baSIngo Weinhold v = count;
301dcadb2baSIngo Weinhold
302dcadb2baSIngo Weinhold if (next_skip == 1)
303dcadb2baSIngo Weinhold radix_leaf_dealloc(&node[i], slotIndex, v);
304dcadb2baSIngo Weinhold else
305dcadb2baSIngo Weinhold radix_node_dealloc(&node[i], slotIndex, v, radix,
306dcadb2baSIngo Weinhold next_skip - 1, index);
307dcadb2baSIngo Weinhold
308dcadb2baSIngo Weinhold if (node->big_hint < node[i].big_hint)
309dcadb2baSIngo Weinhold node->big_hint = node[i].big_hint;
310dcadb2baSIngo Weinhold
311dcadb2baSIngo Weinhold count -= v;
312dcadb2baSIngo Weinhold slotIndex += v;
313dcadb2baSIngo Weinhold index += radix;
314dcadb2baSIngo Weinhold i += next_skip;
315dcadb2baSIngo Weinhold }
316dcadb2baSIngo Weinhold }
317dcadb2baSIngo Weinhold
318dcadb2baSIngo Weinhold
319dcadb2baSIngo Weinhold void
radix_bitmap_dealloc(radix_bitmap * bmp,radix_slot_t slotIndex,uint32 count)320*8c9b84a5SIngo Weinhold radix_bitmap_dealloc(radix_bitmap *bmp, radix_slot_t slotIndex, uint32 count)
321dcadb2baSIngo Weinhold {
322dcadb2baSIngo Weinhold if (bmp->radix == BITMAP_RADIX)
323dcadb2baSIngo Weinhold radix_leaf_dealloc(bmp->root, slotIndex, count);
324dcadb2baSIngo Weinhold else
325dcadb2baSIngo Weinhold radix_node_dealloc(bmp->root, slotIndex, count, bmp->radix,
326dcadb2baSIngo Weinhold bmp->skip, 0);
327dcadb2baSIngo Weinhold
328dcadb2baSIngo Weinhold bmp->free_slots += count;
329dcadb2baSIngo Weinhold }
330dcadb2baSIngo Weinhold
331