1 /*-
2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
5 * are met:
6 * 1. Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * 4. Neither the name of the University nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
30 *
31 * This module implements a general bitmap allocator/deallocator. The
32 * allocator eats around 2 bits per 'block'. The module does not
33 * try to interpret the meaning of a 'block' other then to return
34 * SWAPBLK_NONE on an allocation failure.
35 *
36 * A radix tree is used to maintain the bitmap. Two radix constants are
37 * involved: One for the bitmaps contained in the leaf nodes (typically
38 * 32), and one for the meta nodes (typically 16). Both meta and leaf
39 * nodes have a hint field. This field gives us a hint as to the largest
40 * free contiguous range of blocks under the node. It may contain a
41 * value that is too high, but will never contain a value that is too
42 * low. When the radix tree is searched, allocation failures in subtrees
43 * update the hint.
44 *
45 * The radix tree also implements two collapsed states for meta nodes:
46 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
47 * in either of these two states, all information contained underneath
48 * the node is considered stale. These states are used to optimize
49 * allocation and freeing operations.
50 *
51 * The hinting greatly increases code efficiency for allocations while
52 * the general radix structure optimizes both allocations and frees. The
53 * radix tree should be able to operate well no matter how much
54 * fragmentation there is and no matter how large a bitmap is used.
55 *
56 * Unlike the rlist code, the blist code wires all necessary memory at
57 * creation time. Neither allocations nor frees require interaction with
58 * the memory subsystem. In contrast, the rlist code may allocate memory
59 * on an rlist_free() call. The non-blocking features of the blist code
60 * are used to great advantage in the swap code (vm/nswap_pager.c). The
61 * rlist code uses a little less overall memory then the blist code (but
62 * due to swap interleaving not all that much less), but the blist code
63 * scales much, much better.
64 *
65 * LAYOUT: The radix tree is layed out recursively using a
66 * linear array. Each meta node is immediately followed (layed out
67 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
68 * is a recursive structure but one that can be easily scanned through
69 * a very simple 'skip' calculation. In order to support large radixes,
70 * portions of the tree may reside outside our memory allocation. We
71 * handle this with an early-termination optimization (when bighint is
72 * set to -1) on the scan. The memory allocation is only large enough
73 * to cover the number of blocks requested at creation time even if it
74 * must be encompassed in larger root-node radix.
75 *
76 * NOTE: the allocator cannot currently allocate more then
77 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78 * large' if you try. This is an area that could use improvement. The
79 * radix is large enough that this restriction does not effect the swap
80 * system, though. Currently only the allocation code is effected by
81 * this algorithmic unfeature. The freeing code can handle arbitrary
82 * ranges.
83 *
84 * This code can be compiled stand-alone for debugging.
85 */
86
87 /*
88 * NOTE: a few modifications is made in the orginal FreeBSD code:
89 * 1. change the name of some variables/constants
90 * 2. discard the code handling ALL_FREE and ALL_ALLOCATED state
91 * 3. allocate more than 32 slots won't lead to kernel panic
92 * 4. remove all the code for debugging
93 *
94 * Zhao Shuai
95 * upczhsh@163.com
96 */
97
98
99 #include <stdlib.h>
100 #include <util/RadixBitmap.h>
101
102 #define TERMINATOR -1
103
104
105 static uint32
radix_bitmap_init(radix_node * node,uint32 radix,uint32 skip,uint32 slots)106 radix_bitmap_init(radix_node *node, uint32 radix, uint32 skip, uint32 slots)
107 {
108 uint32 index = 0;
109 int32 big_hint = radix < slots ? radix : slots;
110
111 // leaf node
112 if (radix == BITMAP_RADIX) {
113 if (node) {
114 node->big_hint = big_hint;
115 if (big_hint == BITMAP_RADIX)
116 node->u.bitmap = 0;
117 else
118 node->u.bitmap = (bitmap_t)-1 << big_hint;
119 }
120 return index;
121 }
122
123 // not leaf node
124 if (node) {
125 node->big_hint = big_hint;
126 node->u.available = big_hint;
127 }
128
129 radix /= NODE_RADIX;
130 uint32 next_skip = skip / NODE_RADIX;
131
132 uint32 i;
133 for (i = 1; i <= skip; i += next_skip) {
134 if (slots >= radix) {
135 index = i + radix_bitmap_init(node ? &node[i] : NULL,
136 radix, next_skip - 1, radix);
137 slots -= radix;
138 } else if (slots > 0) {
139 index = i + radix_bitmap_init(node ? &node[i] : NULL,
140 radix, next_skip - 1, slots);
141 slots = 0;
142 } else { // add a terminator
143 if (node)
144 node[i].big_hint = TERMINATOR;
145 break;
146 }
147 }
148
149 if (index < i)
150 index = i;
151
152 return index;
153 }
154
155
156 radix_bitmap *
radix_bitmap_create(uint32 slots)157 radix_bitmap_create(uint32 slots)
158 {
159 uint32 radix = BITMAP_RADIX;
160 uint32 skip = 0;
161
162 while (radix < slots) {
163 radix *= NODE_RADIX;
164 skip = (skip + 1) * NODE_RADIX;
165 }
166
167 radix_bitmap *bmp = (radix_bitmap *)malloc(sizeof(radix_bitmap));
168 if (bmp == NULL)
169 return NULL;
170
171 bmp->radix = radix;
172 bmp->skip = skip;
173 bmp->free_slots = slots;
174 bmp->root_size = 1 + radix_bitmap_init(NULL, radix, skip, slots);
175
176 bmp->root = (radix_node *)malloc(bmp->root_size * sizeof(radix_node));
177 if (bmp->root == NULL) {
178 free(bmp);
179 return NULL;
180 }
181
182 radix_bitmap_init(bmp->root, radix, skip, slots);
183
184 return bmp;
185 }
186
187
188 void
radix_bitmap_destroy(radix_bitmap * bmp)189 radix_bitmap_destroy(radix_bitmap *bmp)
190 {
191 free(bmp->root);
192 free(bmp);
193 }
194
195
196 static radix_slot_t
radix_leaf_alloc(radix_node * leaf,radix_slot_t slotIndex,int32 count)197 radix_leaf_alloc(radix_node *leaf, radix_slot_t slotIndex, int32 count)
198 {
199 if (count <= (int32)BITMAP_RADIX) {
200 bitmap_t bitmap = ~leaf->u.bitmap;
201 uint32 n = BITMAP_RADIX - count;
202 bitmap_t mask = (bitmap_t)-1 >> n;
203 for (uint32 j = 0; j <= n; j++) {
204 if ((bitmap & mask) == mask) {
205 leaf->u.bitmap |= mask;
206 return (slotIndex + j);
207 }
208 mask <<= 1;
209 }
210 }
211
212 // we could not allocate count here, update big_hint
213 if (leaf->big_hint >= count)
214 leaf->big_hint = count - 1;
215 return RADIX_SLOT_NONE;
216 }
217
218
219 static radix_slot_t
radix_node_alloc(radix_node * node,radix_slot_t slotIndex,int32 count,uint32 radix,uint32 skip)220 radix_node_alloc(radix_node *node, radix_slot_t slotIndex, int32 count,
221 uint32 radix, uint32 skip)
222 {
223 uint32 next_skip = skip / NODE_RADIX;
224 radix /= NODE_RADIX;
225
226 for (uint32 i = 1; i <= skip; i += next_skip) {
227 if (node[i].big_hint == TERMINATOR) // TERMINATOR
228 break;
229
230 if (count <= node[i].big_hint) {
231 radix_slot_t addr = RADIX_SLOT_NONE;
232 if (next_skip == 1)
233 addr = radix_leaf_alloc(&node[i], slotIndex, count);
234 else
235 addr = radix_node_alloc(&node[i], slotIndex, count, radix,
236 next_skip - 1);
237 if (addr != RADIX_SLOT_NONE) {
238 node->u.available -= count;
239 if (node->big_hint > node->u.available)
240 node->big_hint = node->u.available;
241
242 return addr;
243 }
244 }
245 slotIndex += radix;
246 }
247
248 // we could not allocate count in the subtree, update big_hint
249 if (node->big_hint >= count)
250 node->big_hint = count - 1;
251 return RADIX_SLOT_NONE;
252 }
253
254
255 radix_slot_t
radix_bitmap_alloc(radix_bitmap * bmp,uint32 count)256 radix_bitmap_alloc(radix_bitmap *bmp, uint32 count)
257 {
258 radix_slot_t addr = RADIX_SLOT_NONE;
259
260 if (bmp->radix == BITMAP_RADIX)
261 addr = radix_leaf_alloc(bmp->root, 0, count);
262 else
263 addr = radix_node_alloc(bmp->root, 0, count, bmp->radix, bmp->skip);
264
265 if (addr != RADIX_SLOT_NONE)
266 bmp->free_slots -= count;
267
268 return addr;
269 }
270
271
272 static void
radix_leaf_dealloc(radix_node * leaf,radix_slot_t slotIndex,uint32 count)273 radix_leaf_dealloc(radix_node *leaf, radix_slot_t slotIndex, uint32 count)
274 {
275 uint32 n = slotIndex & (BITMAP_RADIX - 1);
276 bitmap_t mask = ((bitmap_t)-1 >> (BITMAP_RADIX - count - n))
277 & ((bitmap_t)-1 << n);
278 leaf->u.bitmap &= ~mask;
279
280 leaf->big_hint = BITMAP_RADIX;
281 }
282
283
284 static void
radix_node_dealloc(radix_node * node,radix_slot_t slotIndex,uint32 count,uint32 radix,uint32 skip,radix_slot_t index)285 radix_node_dealloc(radix_node *node, radix_slot_t slotIndex, uint32 count,
286 uint32 radix, uint32 skip, radix_slot_t index)
287 {
288 node->u.available += count;
289
290 uint32 next_skip = skip / NODE_RADIX;
291 radix /= NODE_RADIX;
292
293 uint32 i = (slotIndex - index) / radix;
294 index += i * radix;
295 i = i * next_skip + 1;
296
297 while (i <= skip && index < slotIndex + count) {
298 uint32 v = index + radix - slotIndex;
299 if (v > count)
300 v = count;
301
302 if (next_skip == 1)
303 radix_leaf_dealloc(&node[i], slotIndex, v);
304 else
305 radix_node_dealloc(&node[i], slotIndex, v, radix,
306 next_skip - 1, index);
307
308 if (node->big_hint < node[i].big_hint)
309 node->big_hint = node[i].big_hint;
310
311 count -= v;
312 slotIndex += v;
313 index += radix;
314 i += next_skip;
315 }
316 }
317
318
319 void
radix_bitmap_dealloc(radix_bitmap * bmp,radix_slot_t slotIndex,uint32 count)320 radix_bitmap_dealloc(radix_bitmap *bmp, radix_slot_t slotIndex, uint32 count)
321 {
322 if (bmp->radix == BITMAP_RADIX)
323 radix_leaf_dealloc(bmp->root, slotIndex, count);
324 else
325 radix_node_dealloc(bmp->root, slotIndex, count, bmp->radix,
326 bmp->skip, 0);
327
328 bmp->free_slots += count;
329 }
330
331