xref: /haiku/src/system/kernel/heap.cpp (revision 1294543de9ac0eff000eaea1b18368c36435d08e)
1 /*
2  * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <string.h>
19 #include <team.h>
20 #include <thread.h>
21 #include <tracing.h>
22 #include <util/AutoLock.h>
23 #include <vm/vm.h>
24 #include <vm/vm_page.h>
25 
26 
27 //#define TRACE_HEAP
28 #ifdef TRACE_HEAP
29 #	define TRACE(x) dprintf x
30 #else
31 #	define TRACE(x) ;
32 #endif
33 
34 
35 #if USE_SLAB_ALLOCATOR_FOR_MALLOC
36 #	undef KERNEL_HEAP_LEAK_CHECK
37 #endif
38 
39 
40 #if KERNEL_HEAP_LEAK_CHECK
41 typedef struct heap_leak_check_info_s {
42 	addr_t		caller;
43 	size_t		size;
44 	thread_id	thread;
45 	team_id		team;
46 } heap_leak_check_info;
47 
48 struct caller_info {
49 	addr_t		caller;
50 	uint32		count;
51 	uint32		size;
52 };
53 
54 static const int32 kCallerInfoTableSize = 1024;
55 static caller_info sCallerInfoTable[kCallerInfoTableSize];
56 static int32 sCallerInfoCount = 0;
57 #endif	// KERNEL_HEAP_LEAK_CHECK
58 
59 
60 typedef struct heap_page_s heap_page;
61 
62 
63 typedef struct heap_area_s {
64 	area_id			area;
65 
66 	addr_t			base;
67 	size_t			size;
68 
69 	uint32			page_count;
70 	uint32			free_page_count;
71 
72 	heap_page *		free_pages;
73 	heap_page *		page_table;
74 
75 	heap_area_s *	prev;
76 	heap_area_s *	next;
77 	heap_area_s *	all_next;
78 } heap_area;
79 
80 
81 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
82 
83 typedef struct heap_page_s {
84 	heap_area *		area;
85 	uint16			index;
86 	uint16			bin_index : 5;
87 	uint16			free_count : 10;
88 	uint16			in_use : 1;
89 	heap_page_s *	next;
90 	heap_page_s *	prev;
91 	union {
92 		uint16			empty_index;
93 		uint16			allocation_id; // used for bin == bin_count allocations
94 	};
95 	addr_t *		free_list;
96 } heap_page;
97 
98 
99 typedef struct heap_bin_s {
100 	mutex		lock;
101 	uint32		element_size;
102 	uint16		max_free_count;
103 	heap_page *	page_list; // sorted so that the desired page is always first
104 } heap_bin;
105 
106 
107 struct heap_allocator_s {
108 	rw_lock		area_lock;
109 	mutex		page_lock;
110 
111 	const char *name;
112 	uint32		bin_count;
113 	uint32		page_size;
114 
115 	uint32		total_pages;
116 	uint32		total_free_pages;
117 	uint32		empty_areas;
118 
119 #if KERNEL_HEAP_LEAK_CHECK
120 	addr_t		(*get_caller)();
121 #endif
122 
123 	heap_bin *	bins;
124 	heap_area *	areas; // sorted so that the desired area is always first
125 	heap_area *	all_areas; // all areas including full ones
126 };
127 
128 
129 static const uint32 kAreaAllocationMagic = 'AAMG';
130 typedef struct area_allocation_info_s {
131 	area_id		area;
132 	void *		base;
133 	uint32		magic;
134 	size_t		size;
135 	size_t		allocation_size;
136 	size_t		allocation_alignment;
137 	void *		allocation_base;
138 } area_allocation_info;
139 
140 
141 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
142 };
143 
144 
145 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
146 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
147 
148 
149 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
150 
151 #define VIP_HEAP_SIZE	1024 * 1024
152 
153 // Heap class configuration
154 #define HEAP_CLASS_COUNT 3
155 static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
156 	{
157 		"small",					/* name */
158 		50,							/* initial percentage */
159 		B_PAGE_SIZE / 8,			/* max allocation size */
160 		B_PAGE_SIZE,				/* page size */
161 		8,							/* min bin size */
162 		4,							/* bin alignment */
163 		8,							/* min count per page */
164 		16							/* max waste per page */
165 	},
166 	{
167 		"medium",					/* name */
168 		30,							/* initial percentage */
169 		B_PAGE_SIZE * 2,			/* max allocation size */
170 		B_PAGE_SIZE * 8,			/* page size */
171 		B_PAGE_SIZE / 8,			/* min bin size */
172 		32,							/* bin alignment */
173 		4,							/* min count per page */
174 		64							/* max waste per page */
175 	},
176 	{
177 		"large",					/* name */
178 		20,							/* initial percentage */
179 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
180 		B_PAGE_SIZE * 16,			/* page size */
181 		B_PAGE_SIZE * 2,			/* min bin size */
182 		128,						/* bin alignment */
183 		1,							/* min count per page */
184 		256							/* max waste per page */
185 	}
186 };
187 
188 
189 static uint32 sHeapCount;
190 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
191 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
192 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
193 
194 static heap_allocator *sVIPHeap;
195 static heap_allocator *sGrowHeap = NULL;
196 static thread_id sHeapGrowThread = -1;
197 static sem_id sHeapGrowSem = -1;
198 static sem_id sHeapGrownNotify = -1;
199 static bool sAddGrowHeap = false;
200 
201 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
202 
203 static DeferredFreeList sDeferredFreeList;
204 static DeferredDeletableList sDeferredDeletableList;
205 static spinlock sDeferredFreeListLock;
206 
207 
208 
209 // #pragma mark - Tracing
210 
211 #if KERNEL_HEAP_TRACING
212 namespace KernelHeapTracing {
213 
214 class Allocate : public AbstractTraceEntry {
215 	public:
216 		Allocate(addr_t address, size_t size)
217 			:	fAddress(address),
218 				fSize(size)
219 		{
220 			Initialized();
221 		}
222 
223 		virtual void AddDump(TraceOutput &out)
224 		{
225 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
226 		}
227 
228 	private:
229 		addr_t	fAddress;
230 		size_t	fSize;
231 };
232 
233 
234 class Reallocate : public AbstractTraceEntry {
235 	public:
236 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
237 			:	fOldAddress(oldAddress),
238 				fNewAddress(newAddress),
239 				fNewSize(newSize)
240 		{
241 			Initialized();
242 		};
243 
244 		virtual void AddDump(TraceOutput &out)
245 		{
246 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
247 				fOldAddress, fNewAddress, fNewSize);
248 		}
249 
250 	private:
251 		addr_t	fOldAddress;
252 		addr_t	fNewAddress;
253 		size_t	fNewSize;
254 };
255 
256 
257 class Free : public AbstractTraceEntry {
258 	public:
259 		Free(addr_t address)
260 			:	fAddress(address)
261 		{
262 			Initialized();
263 		};
264 
265 		virtual void AddDump(TraceOutput &out)
266 		{
267 			out.Print("heap free: 0x%08lx", fAddress);
268 		}
269 
270 	private:
271 		addr_t	fAddress;
272 };
273 
274 
275 } // namespace KernelHeapTracing
276 
277 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
278 #else
279 #	define T(x)	;
280 #endif
281 
282 
283 // #pragma mark - Debug functions
284 
285 
286 #if KERNEL_HEAP_LEAK_CHECK
287 static addr_t
288 get_caller()
289 {
290 	// Find the first return address outside of the allocator code. Note, that
291 	// this makes certain assumptions about how the code for the functions
292 	// ends up in the kernel object.
293 	addr_t returnAddresses[5];
294 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
295 		STACK_TRACE_KERNEL);
296 	for (int32 i = 0; i < depth; i++) {
297 		if (returnAddresses[i] < (addr_t)&get_caller
298 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
299 			return returnAddresses[i];
300 		}
301 	}
302 
303 	return 0;
304 }
305 #endif
306 
307 
308 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
309 
310 
311 static void
312 dump_page(heap_page *page)
313 {
314 	uint32 count = 0;
315 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
316 		count++;
317 
318 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
319 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
320 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
321 }
322 
323 
324 static void
325 dump_bin(heap_bin *bin)
326 {
327 	uint32 count = 0;
328 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
329 		count++;
330 
331 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
332 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
333 
334 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
335 		dump_page(page);
336 }
337 
338 
339 static void
340 dump_bin_list(heap_allocator *heap)
341 {
342 	for (uint32 i = 0; i < heap->bin_count; i++)
343 		dump_bin(&heap->bins[i]);
344 	kprintf("\n");
345 }
346 
347 
348 static void
349 dump_allocator_areas(heap_allocator *heap)
350 {
351 	heap_area *area = heap->all_areas;
352 	while (area) {
353 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
354 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
355 			area->size, area->page_count, area->free_pages,
356 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
357 		area = area->all_next;
358 	}
359 
360 	kprintf("\n");
361 }
362 
363 
364 static void
365 dump_allocator(heap_allocator *heap, bool areas, bool bins)
366 {
367 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
368 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
369 		heap->page_size, heap->bin_count, heap->total_pages,
370 		heap->total_free_pages, heap->empty_areas);
371 
372 	if (areas)
373 		dump_allocator_areas(heap);
374 	if (bins)
375 		dump_bin_list(heap);
376 }
377 
378 
379 static int
380 dump_heap_list(int argc, char **argv)
381 {
382 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
383 		// only dump dedicated grow heap info
384 		kprintf("dedicated grow heap:\n");
385 		dump_allocator(sGrowHeap, true, true);
386 		return 0;
387 	}
388 
389 	bool stats = false;
390 	int i = 1;
391 
392 	if (strcmp(argv[1], "stats") == 0) {
393 		stats = true;
394 		i++;
395 	}
396 
397 	uint64 heapAddress = 0;
398 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
399 		print_debugger_command_usage(argv[0]);
400 		return 0;
401 	}
402 
403 	if (heapAddress == 0) {
404 		// dump default kernel heaps
405 		for (uint32 i = 0; i < sHeapCount; i++)
406 			dump_allocator(sHeaps[i], !stats, !stats);
407 	} else {
408 		// dump specified heap
409 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
410 	}
411 
412 	return 0;
413 }
414 
415 
416 #if !KERNEL_HEAP_LEAK_CHECK
417 
418 static int
419 dump_allocations(int argc, char **argv)
420 {
421 	uint64 heapAddress = 0;
422 	bool statsOnly = false;
423 	for (int32 i = 1; i < argc; i++) {
424 		if (strcmp(argv[i], "stats") == 0)
425 			statsOnly = true;
426 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
427 			print_debugger_command_usage(argv[0]);
428 			return 0;
429 		}
430 	}
431 
432 	size_t totalSize = 0;
433 	uint32 totalCount = 0;
434 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
435 		heap_allocator *heap = sHeaps[heapIndex];
436 		if (heapAddress != 0)
437 			heap = (heap_allocator *)(addr_t)heapAddress;
438 
439 		// go through all the pages in all the areas
440 		heap_area *area = heap->all_areas;
441 		while (area) {
442 			for (uint32 i = 0; i < area->page_count; i++) {
443 				heap_page *page = &area->page_table[i];
444 				if (!page->in_use)
445 					continue;
446 
447 				addr_t base = area->base + i * heap->page_size;
448 				if (page->bin_index < heap->bin_count) {
449 					// page is used by a small allocation bin
450 					uint32 elementCount = page->empty_index;
451 					size_t elementSize
452 						= heap->bins[page->bin_index].element_size;
453 					for (uint32 j = 0; j < elementCount;
454 							j++, base += elementSize) {
455 						// walk the free list to see if this element is in use
456 						bool elementInUse = true;
457 						for (addr_t *temp = page->free_list; temp != NULL;
458 								temp = (addr_t *)*temp) {
459 							if ((addr_t)temp == base) {
460 								elementInUse = false;
461 								break;
462 							}
463 						}
464 
465 						if (!elementInUse)
466 							continue;
467 
468 						if (!statsOnly) {
469 							kprintf("address: 0x%08lx; size: %lu bytes\n",
470 								base, elementSize);
471 						}
472 
473 						totalSize += elementSize;
474 						totalCount++;
475 					}
476 				} else {
477 					// page is used by a big allocation, find the page count
478 					uint32 pageCount = 1;
479 					while (i + pageCount < area->page_count
480 						&& area->page_table[i + pageCount].in_use
481 						&& area->page_table[i + pageCount].bin_index
482 							== heap->bin_count
483 						&& area->page_table[i + pageCount].allocation_id
484 							== page->allocation_id)
485 						pageCount++;
486 
487 					size_t size = pageCount * heap->page_size;
488 
489 					if (!statsOnly) {
490 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
491 							size);
492 					}
493 
494 					totalSize += size;
495 					totalCount++;
496 
497 					// skip the allocated pages
498 					i += pageCount - 1;
499 				}
500 			}
501 
502 			area = area->all_next;
503 		}
504 
505 		if (heapAddress != 0)
506 			break;
507 	}
508 
509 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
510 	return 0;
511 }
512 
513 #else // !KERNEL_HEAP_LEAK_CHECK
514 
515 static int
516 dump_allocations(int argc, char **argv)
517 {
518 	team_id team = -1;
519 	thread_id thread = -1;
520 	addr_t caller = 0;
521 	addr_t address = 0;
522 	bool statsOnly = false;
523 
524 	for (int32 i = 1; i < argc; i++) {
525 		if (strcmp(argv[i], "team") == 0)
526 			team = parse_expression(argv[++i]);
527 		else if (strcmp(argv[i], "thread") == 0)
528 			thread = parse_expression(argv[++i]);
529 		else if (strcmp(argv[i], "caller") == 0)
530 			caller = parse_expression(argv[++i]);
531 		else if (strcmp(argv[i], "address") == 0)
532 			address = parse_expression(argv[++i]);
533 		else if (strcmp(argv[i], "stats") == 0)
534 			statsOnly = true;
535 		else {
536 			print_debugger_command_usage(argv[0]);
537 			return 0;
538 		}
539 	}
540 
541 	size_t totalSize = 0;
542 	uint32 totalCount = 0;
543 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
544 		heap_allocator *heap = sHeaps[heapIndex];
545 
546 		// go through all the pages in all the areas
547 		heap_area *area = heap->all_areas;
548 		while (area) {
549 			heap_leak_check_info *info = NULL;
550 			for (uint32 i = 0; i < area->page_count; i++) {
551 				heap_page *page = &area->page_table[i];
552 				if (!page->in_use)
553 					continue;
554 
555 				addr_t base = area->base + i * heap->page_size;
556 				if (page->bin_index < heap->bin_count) {
557 					// page is used by a small allocation bin
558 					uint32 elementCount = page->empty_index;
559 					size_t elementSize
560 						= heap->bins[page->bin_index].element_size;
561 					for (uint32 j = 0; j < elementCount;
562 							j++, base += elementSize) {
563 						// walk the free list to see if this element is in use
564 						bool elementInUse = true;
565 						for (addr_t *temp = page->free_list; temp != NULL;
566 								temp = (addr_t *)*temp) {
567 							if ((addr_t)temp == base) {
568 								elementInUse = false;
569 								break;
570 							}
571 						}
572 
573 						if (!elementInUse)
574 							continue;
575 
576 						info = (heap_leak_check_info *)(base + elementSize
577 							- sizeof(heap_leak_check_info));
578 
579 						if ((team == -1 || info->team == team)
580 							&& (thread == -1 || info->thread == thread)
581 							&& (caller == 0 || info->caller == caller)
582 							&& (address == 0 || base == address)) {
583 							// interesting...
584 							if (!statsOnly) {
585 								kprintf("team: % 6ld; thread: % 6ld; "
586 									"address: 0x%08lx; size: %lu bytes; "
587 									"caller: %#lx\n", info->team, info->thread,
588 									base, info->size, info->caller);
589 							}
590 
591 							totalSize += info->size;
592 							totalCount++;
593 						}
594 					}
595 				} else {
596 					// page is used by a big allocation, find the page count
597 					uint32 pageCount = 1;
598 					while (i + pageCount < area->page_count
599 						&& area->page_table[i + pageCount].in_use
600 						&& area->page_table[i + pageCount].bin_index
601 							== heap->bin_count
602 						&& area->page_table[i + pageCount].allocation_id
603 							== page->allocation_id)
604 						pageCount++;
605 
606 					info = (heap_leak_check_info *)(base + pageCount
607 						* heap->page_size - sizeof(heap_leak_check_info));
608 
609 					if ((team == -1 || info->team == team)
610 						&& (thread == -1 || info->thread == thread)
611 						&& (caller == 0 || info->caller == caller)
612 						&& (address == 0 || base == address)) {
613 						// interesting...
614 						if (!statsOnly) {
615 							kprintf("team: % 6ld; thread: % 6ld;"
616 								" address: 0x%08lx; size: %lu bytes;"
617 								" caller: %#lx\n", info->team, info->thread,
618 								base, info->size, info->caller);
619 						}
620 
621 						totalSize += info->size;
622 						totalCount++;
623 					}
624 
625 					// skip the allocated pages
626 					i += pageCount - 1;
627 				}
628 			}
629 
630 			area = area->all_next;
631 		}
632 	}
633 
634 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
635 		totalSize);
636 	return 0;
637 }
638 
639 
640 static caller_info*
641 get_caller_info(addr_t caller)
642 {
643 	// find the caller info
644 	for (int32 i = 0; i < sCallerInfoCount; i++) {
645 		if (caller == sCallerInfoTable[i].caller)
646 			return &sCallerInfoTable[i];
647 	}
648 
649 	// not found, add a new entry, if there are free slots
650 	if (sCallerInfoCount >= kCallerInfoTableSize)
651 		return NULL;
652 
653 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
654 	info->caller = caller;
655 	info->count = 0;
656 	info->size = 0;
657 
658 	return info;
659 }
660 
661 
662 static int
663 caller_info_compare_size(const void* _a, const void* _b)
664 {
665 	const caller_info* a = (const caller_info*)_a;
666 	const caller_info* b = (const caller_info*)_b;
667 	return (int)(b->size - a->size);
668 }
669 
670 
671 static int
672 caller_info_compare_count(const void* _a, const void* _b)
673 {
674 	const caller_info* a = (const caller_info*)_a;
675 	const caller_info* b = (const caller_info*)_b;
676 	return (int)(b->count - a->count);
677 }
678 
679 
680 static bool
681 analyze_allocation_callers(heap_allocator *heap)
682 {
683 	// go through all the pages in all the areas
684 	heap_area *area = heap->all_areas;
685 	while (area) {
686 		heap_leak_check_info *info = NULL;
687 		for (uint32 i = 0; i < area->page_count; i++) {
688 			heap_page *page = &area->page_table[i];
689 			if (!page->in_use)
690 				continue;
691 
692 			addr_t base = area->base + i * heap->page_size;
693 			if (page->bin_index < heap->bin_count) {
694 				// page is used by a small allocation bin
695 				uint32 elementCount = page->empty_index;
696 				size_t elementSize = heap->bins[page->bin_index].element_size;
697 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
698 					// walk the free list to see if this element is in use
699 					bool elementInUse = true;
700 					for (addr_t *temp = page->free_list; temp != NULL;
701 						temp = (addr_t *)*temp) {
702 						if ((addr_t)temp == base) {
703 							elementInUse = false;
704 							break;
705 						}
706 					}
707 
708 					if (!elementInUse)
709 						continue;
710 
711 					info = (heap_leak_check_info *)(base + elementSize
712 						- sizeof(heap_leak_check_info));
713 
714 					caller_info *callerInfo = get_caller_info(info->caller);
715 					if (callerInfo == NULL) {
716 						kprintf("out of space for caller infos\n");
717 						return 0;
718 					}
719 
720 					callerInfo->count++;
721 					callerInfo->size += info->size;
722 				}
723 			} else {
724 				// page is used by a big allocation, find the page count
725 				uint32 pageCount = 1;
726 				while (i + pageCount < area->page_count
727 					&& area->page_table[i + pageCount].in_use
728 					&& area->page_table[i + pageCount].bin_index
729 						== heap->bin_count
730 					&& area->page_table[i + pageCount].allocation_id
731 						== page->allocation_id) {
732 					pageCount++;
733 				}
734 
735 				info = (heap_leak_check_info *)(base + pageCount
736 					* heap->page_size - sizeof(heap_leak_check_info));
737 
738 				caller_info *callerInfo = get_caller_info(info->caller);
739 				if (callerInfo == NULL) {
740 					kprintf("out of space for caller infos\n");
741 					return false;
742 				}
743 
744 				callerInfo->count++;
745 				callerInfo->size += info->size;
746 
747 				// skip the allocated pages
748 				i += pageCount - 1;
749 			}
750 		}
751 
752 		area = area->all_next;
753 	}
754 
755 	return true;
756 }
757 
758 
759 static int
760 dump_allocations_per_caller(int argc, char **argv)
761 {
762 	bool sortBySize = true;
763 	heap_allocator *heap = NULL;
764 
765 	for (int32 i = 1; i < argc; i++) {
766 		if (strcmp(argv[i], "-c") == 0) {
767 			sortBySize = false;
768 		} else if (strcmp(argv[i], "-h") == 0) {
769 			uint64 heapAddress;
770 			if (++i >= argc
771 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
772 				print_debugger_command_usage(argv[0]);
773 				return 0;
774 			}
775 
776 			heap = (heap_allocator*)(addr_t)heapAddress;
777 		} else {
778 			print_debugger_command_usage(argv[0]);
779 			return 0;
780 		}
781 	}
782 
783 	sCallerInfoCount = 0;
784 
785 	if (heap != NULL) {
786 		if (!analyze_allocation_callers(heap))
787 			return 0;
788 	} else {
789 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
790 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
791 				return 0;
792 		}
793 	}
794 
795 	// sort the array
796 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
797 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
798 
799 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
800 		sortBySize ? "size" : "count");
801 
802 	kprintf("     count        size      caller\n");
803 	kprintf("----------------------------------\n");
804 	for (int32 i = 0; i < sCallerInfoCount; i++) {
805 		caller_info& info = sCallerInfoTable[i];
806 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
807 
808 		const char *symbol;
809 		const char *imageName;
810 		bool exactMatch;
811 		addr_t baseAddress;
812 
813 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
814 				&imageName, &exactMatch) == B_OK) {
815 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
816 				info.caller - baseAddress, imageName,
817 				exactMatch ? "" : " (nearest)");
818 		} else
819 			kprintf("\n");
820 	}
821 
822 	return 0;
823 }
824 
825 #endif // KERNEL_HEAP_LEAK_CHECK
826 
827 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
828 
829 
830 #if PARANOID_HEAP_VALIDATION
831 static void
832 heap_validate_heap(heap_allocator *heap)
833 {
834 	ReadLocker areaReadLocker(heap->area_lock);
835 	for (uint32 i = 0; i < heap->bin_count; i++)
836 		mutex_lock(&heap->bins[i].lock);
837 	MutexLocker pageLocker(heap->page_lock);
838 
839 	uint32 totalPageCount = 0;
840 	uint32 totalFreePageCount = 0;
841 	heap_area *area = heap->all_areas;
842 	while (area != NULL) {
843 		// validate the free pages list
844 		uint32 freePageCount = 0;
845 		heap_page *lastPage = NULL;
846 		heap_page *page = area->free_pages;
847 		while (page) {
848 			if ((addr_t)page < (addr_t)&area->page_table[0]
849 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
850 				panic("free page is not part of the page table\n");
851 
852 			if (page->index >= area->page_count)
853 				panic("free page has invalid index\n");
854 
855 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
856 				panic("free page index does not lead to target page\n");
857 
858 			if (page->prev != lastPage)
859 				panic("free page entry has invalid prev link\n");
860 
861 			if (page->in_use)
862 				panic("free page marked as in use\n");
863 
864 			lastPage = page;
865 			page = page->next;
866 			freePageCount++;
867 		}
868 
869 		totalPageCount += freePageCount;
870 		totalFreePageCount += freePageCount;
871 		if (area->free_page_count != freePageCount)
872 			panic("free page count doesn't match free page list\n");
873 
874 		// validate the page table
875 		uint32 usedPageCount = 0;
876 		for (uint32 i = 0; i < area->page_count; i++) {
877 			if (area->page_table[i].in_use)
878 				usedPageCount++;
879 		}
880 
881 		totalPageCount += usedPageCount;
882 		if (freePageCount + usedPageCount != area->page_count) {
883 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
884 				freePageCount, usedPageCount, area->page_count);
885 		}
886 
887 		area = area->all_next;
888 	}
889 
890 	// validate the areas
891 	area = heap->areas;
892 	heap_area *lastArea = NULL;
893 	uint32 lastFreeCount = 0;
894 	while (area != NULL) {
895 		if (area->free_page_count < lastFreeCount)
896 			panic("size ordering of area list broken\n");
897 
898 		if (area->prev != lastArea)
899 			panic("area list entry has invalid prev link\n");
900 
901 		lastArea = area;
902 		lastFreeCount = area->free_page_count;
903 		area = area->next;
904 	}
905 
906 	lastArea = NULL;
907 	area = heap->all_areas;
908 	while (area != NULL) {
909 		if (lastArea != NULL && lastArea->base < area->base)
910 			panic("base ordering of all_areas list broken\n");
911 
912 		lastArea = area;
913 		area = area->all_next;
914 	}
915 
916 	// validate the bins
917 	for (uint32 i = 0; i < heap->bin_count; i++) {
918 		heap_bin *bin = &heap->bins[i];
919 		heap_page *lastPage = NULL;
920 		heap_page *page = bin->page_list;
921 		lastFreeCount = 0;
922 		while (page) {
923 			area = heap->all_areas;
924 			while (area) {
925 				if (area == page->area)
926 					break;
927 				area = area->all_next;
928 			}
929 
930 			if (area == NULL) {
931 				panic("page area not present in area list\n");
932 				page = page->next;
933 				continue;
934 			}
935 
936 			if ((addr_t)page < (addr_t)&area->page_table[0]
937 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
938 				panic("used page is not part of the page table\n");
939 
940 			if (page->index >= area->page_count)
941 				panic("used page has invalid index\n");
942 
943 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
944 				panic("used page index does not lead to target page\n");
945 
946 			if (page->prev != lastPage) {
947 				panic("used page entry has invalid prev link (%p vs %p bin "
948 					"%lu)\n", page->prev, lastPage, i);
949 			}
950 
951 			if (!page->in_use)
952 				panic("used page marked as not in use\n");
953 
954 			if (page->bin_index != i) {
955 				panic("used page with bin index %u in page list of bin %lu\n",
956 					page->bin_index, i);
957 			}
958 
959 			if (page->free_count < lastFreeCount)
960 				panic("ordering of bin page list broken\n");
961 
962 			// validate the free list
963 			uint32 freeSlotsCount = 0;
964 			addr_t *element = page->free_list;
965 			addr_t pageBase = area->base + page->index * heap->page_size;
966 			while (element) {
967 				if ((addr_t)element < pageBase
968 					|| (addr_t)element >= pageBase + heap->page_size)
969 					panic("free list entry out of page range\n");
970 
971 				if (((addr_t)element - pageBase) % bin->element_size != 0)
972 					panic("free list entry not on a element boundary\n");
973 
974 				element = (addr_t *)*element;
975 				freeSlotsCount++;
976 			}
977 
978 			uint32 slotCount = bin->max_free_count;
979 			if (page->empty_index > slotCount) {
980 				panic("empty index beyond slot count (%u with %lu slots)\n",
981 					page->empty_index, slotCount);
982 			}
983 
984 			freeSlotsCount += (slotCount - page->empty_index);
985 			if (freeSlotsCount > slotCount)
986 				panic("more free slots than fit into the page\n");
987 
988 			lastPage = page;
989 			lastFreeCount = page->free_count;
990 			page = page->next;
991 		}
992 	}
993 
994 	pageLocker.Unlock();
995 	for (uint32 i = 0; i < heap->bin_count; i++)
996 		mutex_unlock(&heap->bins[i].lock);
997 	areaReadLocker.Unlock();
998 }
999 #endif // PARANOID_HEAP_VALIDATION
1000 
1001 
1002 // #pragma mark - Heap functions
1003 
1004 
1005 void
1006 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1007 {
1008 	heap_area *area = (heap_area *)base;
1009 	area->area = areaID;
1010 
1011 	base += sizeof(heap_area);
1012 	size -= sizeof(heap_area);
1013 
1014 	uint32 pageCount = size / heap->page_size;
1015 	size_t pageTableSize = pageCount * sizeof(heap_page);
1016 	area->page_table = (heap_page *)base;
1017 	base += pageTableSize;
1018 	size -= pageTableSize;
1019 
1020 	// the rest is now actually usable memory (rounded to the next page)
1021 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1022 	area->size = size & ~(B_PAGE_SIZE - 1);
1023 
1024 	// now we know the real page count
1025 	pageCount = area->size / heap->page_size;
1026 	area->page_count = pageCount;
1027 
1028 	// zero out the page table and fill in page indexes
1029 	memset((void *)area->page_table, 0, pageTableSize);
1030 	for (uint32 i = 0; i < pageCount; i++) {
1031 		area->page_table[i].area = area;
1032 		area->page_table[i].index = i;
1033 	}
1034 
1035 	// add all pages up into the free pages list
1036 	for (uint32 i = 1; i < pageCount; i++) {
1037 		area->page_table[i - 1].next = &area->page_table[i];
1038 		area->page_table[i].prev = &area->page_table[i - 1];
1039 	}
1040 	area->free_pages = &area->page_table[0];
1041 	area->free_page_count = pageCount;
1042 	area->page_table[0].prev = NULL;
1043 	area->next = NULL;
1044 
1045 	WriteLocker areaWriteLocker(heap->area_lock);
1046 	MutexLocker pageLocker(heap->page_lock);
1047 	if (heap->areas == NULL) {
1048 		// it's the only (empty) area in that heap
1049 		area->prev = NULL;
1050 		heap->areas = area;
1051 	} else {
1052 		// link in this area as the last one as it is completely empty
1053 		heap_area *lastArea = heap->areas;
1054 		while (lastArea->next != NULL)
1055 			lastArea = lastArea->next;
1056 
1057 		lastArea->next = area;
1058 		area->prev = lastArea;
1059 	}
1060 
1061 	// insert this area in the all_areas list so it stays ordered by base
1062 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1063 		area->all_next = heap->all_areas;
1064 		heap->all_areas = area;
1065 	} else {
1066 		heap_area *insert = heap->all_areas;
1067 		while (insert->all_next && insert->all_next->base > area->base)
1068 			insert = insert->all_next;
1069 
1070 		area->all_next = insert->all_next;
1071 		insert->all_next = area;
1072 	}
1073 
1074 	heap->total_pages += area->page_count;
1075 	heap->total_free_pages += area->free_page_count;
1076 
1077 	if (areaID >= 0) {
1078 		// this later on deletable area is yet empty - the empty count will be
1079 		// decremented as soon as this area is used for the first time
1080 		heap->empty_areas++;
1081 	}
1082 
1083 	pageLocker.Unlock();
1084 	areaWriteLocker.Unlock();
1085 
1086 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1087 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1088 		area->base + area->size);
1089 }
1090 
1091 
1092 static status_t
1093 heap_remove_area(heap_allocator *heap, heap_area *area)
1094 {
1095 	if (area->free_page_count != area->page_count) {
1096 		panic("tried removing heap area that has still pages in use");
1097 		return B_ERROR;
1098 	}
1099 
1100 	if (area->prev == NULL && area->next == NULL) {
1101 		panic("tried removing the last non-full heap area");
1102 		return B_ERROR;
1103 	}
1104 
1105 	if (heap->areas == area)
1106 		heap->areas = area->next;
1107 	if (area->prev != NULL)
1108 		area->prev->next = area->next;
1109 	if (area->next != NULL)
1110 		area->next->prev = area->prev;
1111 
1112 	if (heap->all_areas == area)
1113 		heap->all_areas = area->all_next;
1114 	else {
1115 		heap_area *previous = heap->all_areas;
1116 		while (previous) {
1117 			if (previous->all_next == area) {
1118 				previous->all_next = area->all_next;
1119 				break;
1120 			}
1121 
1122 			previous = previous->all_next;
1123 		}
1124 
1125 		if (previous == NULL)
1126 			panic("removing heap area that is not in all list");
1127 	}
1128 
1129 	heap->total_pages -= area->page_count;
1130 	heap->total_free_pages -= area->free_page_count;
1131 
1132 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1133 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1134 		heap->name, heap);
1135 
1136 	return B_OK;
1137 }
1138 
1139 
1140 heap_allocator *
1141 heap_create_allocator(const char *name, addr_t base, size_t size,
1142 	const heap_class *heapClass, bool allocateOnHeap)
1143 {
1144 	heap_allocator *heap;
1145 	if (allocateOnHeap) {
1146 		// allocate seperately on the heap
1147 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1148 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1149 	} else {
1150 		// use up the first part of the area
1151 		heap = (heap_allocator *)base;
1152 		base += sizeof(heap_allocator);
1153 		size -= sizeof(heap_allocator);
1154 	}
1155 
1156 	heap->name = name;
1157 	heap->page_size = heapClass->page_size;
1158 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1159 	heap->areas = heap->all_areas = NULL;
1160 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1161 
1162 #if KERNEL_HEAP_LEAK_CHECK
1163 	heap->get_caller = &get_caller;
1164 #endif
1165 
1166 	heap->bin_count = 0;
1167 	size_t binSize = 0, lastSize = 0;
1168 	uint32 count = heap->page_size / heapClass->min_bin_size;
1169 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1170 		if (heap->bin_count >= MAX_BIN_COUNT)
1171 			panic("heap configuration invalid - max bin count reached\n");
1172 
1173 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1174 		if (binSize == lastSize)
1175 			continue;
1176 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1177 			continue;
1178 
1179 		heap_bin *bin = &heap->bins[heap->bin_count];
1180 		mutex_init(&bin->lock, "heap bin lock");
1181 		bin->element_size = binSize;
1182 		bin->max_free_count = heap->page_size / binSize;
1183 		bin->page_list = NULL;
1184 		heap->bin_count++;
1185 	};
1186 
1187 	if (!allocateOnHeap) {
1188 		base += heap->bin_count * sizeof(heap_bin);
1189 		size -= heap->bin_count * sizeof(heap_bin);
1190 	}
1191 
1192 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1193 	mutex_init(&heap->page_lock, "heap page lock");
1194 
1195 	heap_add_area(heap, -1, base, size);
1196 	return heap;
1197 }
1198 
1199 
1200 static inline void
1201 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1202 {
1203 	area->free_page_count += pageCount;
1204 	heap->total_free_pages += pageCount;
1205 
1206 	if (area->free_page_count == pageCount) {
1207 		// we need to add ourselfs to the area list of the heap
1208 		area->prev = NULL;
1209 		area->next = heap->areas;
1210 		if (area->next)
1211 			area->next->prev = area;
1212 		heap->areas = area;
1213 	} else {
1214 		// we might need to move back in the area list
1215 		if (area->next && area->next->free_page_count < area->free_page_count) {
1216 			// move ourselfs so the list stays ordered
1217 			heap_area *insert = area->next;
1218 			while (insert->next
1219 				&& insert->next->free_page_count < area->free_page_count)
1220 				insert = insert->next;
1221 
1222 			if (area->prev)
1223 				area->prev->next = area->next;
1224 			if (area->next)
1225 				area->next->prev = area->prev;
1226 			if (heap->areas == area)
1227 				heap->areas = area->next;
1228 
1229 			area->prev = insert;
1230 			area->next = insert->next;
1231 			if (area->next)
1232 				area->next->prev = area;
1233 			insert->next = area;
1234 		}
1235 	}
1236 
1237 	if (area->free_page_count == area->page_count && area->area >= 0)
1238 		heap->empty_areas++;
1239 }
1240 
1241 
1242 static inline void
1243 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1244 {
1245 	if (area->free_page_count == area->page_count && area->area >= 0) {
1246 		// this area was completely empty
1247 		heap->empty_areas--;
1248 	}
1249 
1250 	area->free_page_count -= pageCount;
1251 	heap->total_free_pages -= pageCount;
1252 
1253 	if (area->free_page_count == 0) {
1254 		// the area is now full so we remove it from the area list
1255 		if (area->prev)
1256 			area->prev->next = area->next;
1257 		if (area->next)
1258 			area->next->prev = area->prev;
1259 		if (heap->areas == area)
1260 			heap->areas = area->next;
1261 		area->next = area->prev = NULL;
1262 	} else {
1263 		// we might need to move forward in the area list
1264 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1265 			// move ourselfs so the list stays ordered
1266 			heap_area *insert = area->prev;
1267 			while (insert->prev
1268 				&& insert->prev->free_page_count > area->free_page_count)
1269 				insert = insert->prev;
1270 
1271 			if (area->prev)
1272 				area->prev->next = area->next;
1273 			if (area->next)
1274 				area->next->prev = area->prev;
1275 
1276 			area->prev = insert->prev;
1277 			area->next = insert;
1278 			if (area->prev)
1279 				area->prev->next = area;
1280 			if (heap->areas == insert)
1281 				heap->areas = area;
1282 			insert->prev = area;
1283 		}
1284 	}
1285 }
1286 
1287 
1288 static inline void
1289 heap_link_page(heap_page *page, heap_page **list)
1290 {
1291 	page->prev = NULL;
1292 	page->next = *list;
1293 	if (page->next)
1294 		page->next->prev = page;
1295 	*list = page;
1296 }
1297 
1298 
1299 static inline void
1300 heap_unlink_page(heap_page *page, heap_page **list)
1301 {
1302 	if (page->prev)
1303 		page->prev->next = page->next;
1304 	if (page->next)
1305 		page->next->prev = page->prev;
1306 	if (list && *list == page) {
1307 		*list = page->next;
1308 		if (page->next)
1309 			page->next->prev = NULL;
1310 	}
1311 }
1312 
1313 
1314 static heap_page *
1315 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1316 	size_t alignment)
1317 {
1318 	MutexLocker pageLocker(heap->page_lock);
1319 	heap_area *area = heap->areas;
1320 	while (area) {
1321 		if (area->free_page_count < pageCount) {
1322 			area = area->next;
1323 			continue;
1324 		}
1325 
1326 		uint32 step = 1;
1327 		uint32 firstValid = 0;
1328 		const uint32 lastValid = area->page_count - pageCount + 1;
1329 
1330 		if (alignment > heap->page_size) {
1331 			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1332 				/ heap->page_size;
1333 			step = alignment / heap->page_size;
1334 		}
1335 
1336 		int32 first = -1;
1337 		for (uint32 i = firstValid; i < lastValid; i += step) {
1338 			if (area->page_table[i].in_use)
1339 				continue;
1340 
1341 			first = i;
1342 
1343 			for (uint32 j = 1; j < pageCount; j++) {
1344 				if (area->page_table[i + j].in_use) {
1345 					first = -1;
1346 					i += j / step * step;
1347 					break;
1348 				}
1349 			}
1350 
1351 			if (first >= 0)
1352 				break;
1353 		}
1354 
1355 		if (first < 0) {
1356 			area = area->next;
1357 			continue;
1358 		}
1359 
1360 		for (uint32 i = first; i < first + pageCount; i++) {
1361 			heap_page *page = &area->page_table[i];
1362 			page->in_use = 1;
1363 			page->bin_index = heap->bin_count;
1364 
1365 			heap_unlink_page(page, &area->free_pages);
1366 
1367 			page->next = page->prev = NULL;
1368 			page->free_list = NULL;
1369 			page->allocation_id = (uint16)first;
1370 		}
1371 
1372 		heap_free_pages_removed(heap, area, pageCount);
1373 		return &area->page_table[first];
1374 	}
1375 
1376 	return NULL;
1377 }
1378 
1379 
1380 #if KERNEL_HEAP_LEAK_CHECK
1381 static void
1382 heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1383 	size_t size)
1384 {
1385 	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1386 		- sizeof(heap_leak_check_info));
1387 	info->size = size - sizeof(heap_leak_check_info);
1388 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1389 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1390 	info->caller = heap->get_caller();
1391 }
1392 #endif
1393 
1394 
1395 static void *
1396 heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1397 {
1398 	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1399 		heap, size, alignment));
1400 
1401 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1402 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1403 		alignment);
1404 	if (firstPage == NULL) {
1405 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1406 			heap, size));
1407 		return NULL;
1408 	}
1409 
1410 	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1411 #if KERNEL_HEAP_LEAK_CHECK
1412 	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1413 #endif
1414 	return (void *)address;
1415 }
1416 
1417 
1418 static void *
1419 heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1420 {
1421 	heap_bin *bin = &heap->bins[binIndex];
1422 	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1423 		heap, size, binIndex, bin->element_size));
1424 
1425 	MutexLocker binLocker(bin->lock);
1426 	heap_page *page = bin->page_list;
1427 	if (page == NULL) {
1428 		MutexLocker pageLocker(heap->page_lock);
1429 		heap_area *area = heap->areas;
1430 		if (area == NULL) {
1431 			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1432 				size));
1433 			return NULL;
1434 		}
1435 
1436 		// by design there are only areas in the list that still have
1437 		// free pages available
1438 		page = area->free_pages;
1439 		area->free_pages = page->next;
1440 		if (page->next)
1441 			page->next->prev = NULL;
1442 
1443 		heap_free_pages_removed(heap, area, 1);
1444 
1445 		if (page->in_use)
1446 			panic("got an in use page %p from the free pages list\n", page);
1447 		page->in_use = 1;
1448 
1449 		pageLocker.Unlock();
1450 
1451 		page->bin_index = binIndex;
1452 		page->free_count = bin->max_free_count;
1453 		page->empty_index = 0;
1454 		page->free_list = NULL;
1455 		page->next = page->prev = NULL;
1456 		bin->page_list = page;
1457 	}
1458 
1459 	// we have a page where we have a free slot
1460 	void *address = NULL;
1461 	if (page->free_list) {
1462 		// there's a previously freed entry we can use
1463 		address = page->free_list;
1464 		page->free_list = (addr_t *)*page->free_list;
1465 	} else {
1466 		// the page hasn't been fully allocated so use the next empty_index
1467 		address = (void *)(page->area->base + page->index * heap->page_size
1468 			+ page->empty_index * bin->element_size);
1469 		page->empty_index++;
1470 	}
1471 
1472 	page->free_count--;
1473 	if (page->free_count == 0) {
1474 		// the page is now full so we remove it from the page_list
1475 		bin->page_list = page->next;
1476 		if (page->next)
1477 			page->next->prev = NULL;
1478 		page->next = page->prev = NULL;
1479 	}
1480 
1481 #if KERNEL_HEAP_LEAK_CHECK
1482 	binLocker.Unlock();
1483 	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1484 #endif
1485 	return address;
1486 }
1487 
1488 
1489 static bool
1490 is_valid_alignment(size_t number)
1491 {
1492 	// this cryptic line accepts zero and all powers of two
1493 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1494 }
1495 
1496 
1497 inline bool
1498 heap_should_grow(heap_allocator *heap)
1499 {
1500 	// suggest growing if there is less than 20% of a grow size available
1501 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1502 }
1503 
1504 
1505 void *
1506 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1507 {
1508 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1509 
1510 #if DEBUG
1511 	if (!is_valid_alignment(alignment))
1512 		panic("memalign() with an alignment which is not a power of 2\n");
1513 #endif
1514 
1515 #if KERNEL_HEAP_LEAK_CHECK
1516 	size += sizeof(heap_leak_check_info);
1517 #endif
1518 
1519 	void *address = NULL;
1520 	if (alignment < B_PAGE_SIZE) {
1521 		if (alignment != 0) {
1522 			// TODO: The alignment is done by ensuring that the element size
1523 			// of the target bin is aligned with the requested alignment. This
1524 			// has the problem that it wastes space because a better (smaller)
1525 			// bin could possibly be selected. We should pick the best bin and
1526 			// check if there is an aligned block in the free list or if a new
1527 			// (page aligned) page has to be allocated anyway.
1528 			size = ROUNDUP(size, alignment);
1529 			for (uint32 i = 0; i < heap->bin_count; i++) {
1530 				if (size <= heap->bins[i].element_size
1531 					&& is_valid_alignment(heap->bins[i].element_size)) {
1532 					address = heap_allocate_from_bin(heap, i, size);
1533 					break;
1534 				}
1535 			}
1536 		} else {
1537 			for (uint32 i = 0; i < heap->bin_count; i++) {
1538 				if (size <= heap->bins[i].element_size) {
1539 					address = heap_allocate_from_bin(heap, i, size);
1540 					break;
1541 				}
1542 			}
1543 		}
1544 	}
1545 
1546 	if (address == NULL)
1547 		address = heap_raw_alloc(heap, size, alignment);
1548 
1549 #if KERNEL_HEAP_LEAK_CHECK
1550 	size -= sizeof(heap_leak_check_info);
1551 #endif
1552 
1553 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1554 		size, address));
1555 
1556 	T(Allocate((addr_t)address, size));
1557 	if (address == NULL)
1558 		return address;
1559 
1560 #if PARANOID_KERNEL_MALLOC
1561 	memset(address, 0xcc, size);
1562 #endif
1563 
1564 #if PARANOID_KERNEL_FREE
1565 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1566 	// and the user does not clear it
1567 	if (((uint32 *)address)[1] == 0xdeadbeef)
1568 		((uint32 *)address)[1] = 0xcccccccc;
1569 #endif
1570 
1571 	return address;
1572 }
1573 
1574 
1575 status_t
1576 heap_free(heap_allocator *heap, void *address)
1577 {
1578 	if (address == NULL)
1579 		return B_OK;
1580 
1581 	ReadLocker areaReadLocker(heap->area_lock);
1582 	heap_area *area = heap->all_areas;
1583 	while (area) {
1584 		// since the all_areas list is ordered by base with the biggest
1585 		// base at the top, we need only find the first area with a base
1586 		// smaller than our address to become our only candidate for freeing
1587 		if (area->base <= (addr_t)address) {
1588 			if ((addr_t)address >= area->base + area->size) {
1589 				// none of the other areas can contain the address as the list
1590 				// is ordered
1591 				return B_ENTRY_NOT_FOUND;
1592 			}
1593 
1594 			// this area contains the allocation, we're done searching
1595 			break;
1596 		}
1597 
1598 		area = area->all_next;
1599 	}
1600 
1601 	if (area == NULL) {
1602 		// this address does not belong to us
1603 		return B_ENTRY_NOT_FOUND;
1604 	}
1605 
1606 	TRACE(("free(): asked to free pointer %p\n", address));
1607 
1608 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1609 		/ heap->page_size];
1610 
1611 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1612 		page->bin_index, page->free_count));
1613 
1614 	if (page->bin_index > heap->bin_count) {
1615 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1616 		return B_ERROR;
1617 	}
1618 
1619 	if (page->bin_index < heap->bin_count) {
1620 		// small allocation
1621 		heap_bin *bin = &heap->bins[page->bin_index];
1622 
1623 #if PARANOID_KERNEL_FREE
1624 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1625 			// This block looks like it was freed already, walk the free list
1626 			// on this page to make sure this address doesn't exist.
1627 			MutexLocker binLocker(bin->lock);
1628 			for (addr_t *temp = page->free_list; temp != NULL;
1629 					temp = (addr_t *)*temp) {
1630 				if (temp == address) {
1631 					panic("free(): address %p already exists in page free "
1632 						"list\n", address);
1633 					return B_ERROR;
1634 				}
1635 			}
1636 		}
1637 
1638 		// the first 4 bytes are overwritten with the next free list pointer
1639 		// later
1640 		uint32 *dead = (uint32 *)address;
1641 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1642 			dead[i] = 0xdeadbeef;
1643 #endif
1644 
1645 		MutexLocker binLocker(bin->lock);
1646 		if (((addr_t)address - area->base - page->index
1647 			* heap->page_size) % bin->element_size != 0) {
1648 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1649 				"element size %ld\n", address, bin->element_size);
1650 			return B_ERROR;
1651 		}
1652 
1653 		// add the address to the page free list
1654 		*(addr_t *)address = (addr_t)page->free_list;
1655 		page->free_list = (addr_t *)address;
1656 		page->free_count++;
1657 
1658 		if (page->free_count == bin->max_free_count) {
1659 			// we are now empty, remove the page from the bin list
1660 			MutexLocker pageLocker(heap->page_lock);
1661 			heap_unlink_page(page, &bin->page_list);
1662 			page->in_use = 0;
1663 			heap_link_page(page, &area->free_pages);
1664 			heap_free_pages_added(heap, area, 1);
1665 		} else if (page->free_count == 1) {
1666 			// we need to add ourselfs to the page list of the bin
1667 			heap_link_page(page, &bin->page_list);
1668 		} else {
1669 			// we might need to move back in the free pages list
1670 			if (page->next && page->next->free_count < page->free_count) {
1671 				// move ourselfs so the list stays ordered
1672 				heap_page *insert = page->next;
1673 				while (insert->next
1674 					&& insert->next->free_count < page->free_count)
1675 					insert = insert->next;
1676 
1677 				heap_unlink_page(page, &bin->page_list);
1678 
1679 				page->prev = insert;
1680 				page->next = insert->next;
1681 				if (page->next)
1682 					page->next->prev = page;
1683 				insert->next = page;
1684 			}
1685 		}
1686 	} else {
1687 		// large allocation, just return the pages to the page free list
1688 		uint32 allocationID = page->allocation_id;
1689 		uint32 maxPages = area->page_count - page->index;
1690 		uint32 pageCount = 0;
1691 
1692 		MutexLocker pageLocker(heap->page_lock);
1693 		for (uint32 i = 0; i < maxPages; i++) {
1694 			// loop until we find the end of this allocation
1695 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1696 				|| page[i].allocation_id != allocationID)
1697 				break;
1698 
1699 			// this page still belongs to the same allocation
1700 			page[i].in_use = 0;
1701 			page[i].allocation_id = 0;
1702 
1703 			// return it to the free list
1704 			heap_link_page(&page[i], &area->free_pages);
1705 			pageCount++;
1706 		}
1707 
1708 		heap_free_pages_added(heap, area, pageCount);
1709 	}
1710 
1711 	T(Free((addr_t)address));
1712 	areaReadLocker.Unlock();
1713 
1714 	if (heap->empty_areas > 1) {
1715 		WriteLocker areaWriteLocker(heap->area_lock);
1716 		MutexLocker pageLocker(heap->page_lock);
1717 
1718 		area_id areasToDelete[heap->empty_areas - 1];
1719 		int32 areasToDeleteIndex = 0;
1720 
1721 		area = heap->areas;
1722 		while (area != NULL && heap->empty_areas > 1) {
1723 			heap_area *next = area->next;
1724 			if (area->area >= 0
1725 				&& area->free_page_count == area->page_count
1726 				&& heap_remove_area(heap, area) == B_OK) {
1727 				areasToDelete[areasToDeleteIndex++] = area->area;
1728 				heap->empty_areas--;
1729 			}
1730 
1731 			area = next;
1732 		}
1733 
1734 		pageLocker.Unlock();
1735 		areaWriteLocker.Unlock();
1736 
1737 		for (int32 i = 0; i < areasToDeleteIndex; i++)
1738 			delete_area(areasToDelete[i]);
1739 	}
1740 
1741 	return B_OK;
1742 }
1743 
1744 
1745 #if KERNEL_HEAP_LEAK_CHECK
1746 extern "C" void
1747 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1748 {
1749 	heap->get_caller = getCaller;
1750 }
1751 #endif
1752 
1753 
1754 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
1755 
1756 
1757 static status_t
1758 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1759 	size_t newSize)
1760 {
1761 	ReadLocker areaReadLocker(heap->area_lock);
1762 	heap_area *area = heap->all_areas;
1763 	while (area) {
1764 		// since the all_areas list is ordered by base with the biggest
1765 		// base at the top, we need only find the first area with a base
1766 		// smaller than our address to become our only candidate for
1767 		// reallocating
1768 		if (area->base <= (addr_t)address) {
1769 			if ((addr_t)address >= area->base + area->size) {
1770 				// none of the other areas can contain the address as the list
1771 				// is ordered
1772 				return B_ENTRY_NOT_FOUND;
1773 			}
1774 
1775 			// this area contains the allocation, we're done searching
1776 			break;
1777 		}
1778 
1779 		area = area->all_next;
1780 	}
1781 
1782 	if (area == NULL) {
1783 		// this address does not belong to us
1784 		return B_ENTRY_NOT_FOUND;
1785 	}
1786 
1787 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1788 
1789 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1790 		/ heap->page_size];
1791 	if (page->bin_index > heap->bin_count) {
1792 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1793 			page->bin_index);
1794 		return B_ERROR;
1795 	}
1796 
1797 	// find out the size of the old allocation first
1798 	size_t minSize = 0;
1799 	size_t maxSize = 0;
1800 	if (page->bin_index < heap->bin_count) {
1801 		// this was a small allocation
1802 		heap_bin *bin = &heap->bins[page->bin_index];
1803 		maxSize = bin->element_size;
1804 		if (page->bin_index > 0)
1805 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1806 	} else {
1807 		// this was a large allocation
1808 		uint32 allocationID = page->allocation_id;
1809 		uint32 maxPages = area->page_count - page->index;
1810 		maxSize = heap->page_size;
1811 
1812 		MutexLocker pageLocker(heap->page_lock);
1813 		for (uint32 i = 1; i < maxPages; i++) {
1814 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1815 				|| page[i].allocation_id != allocationID)
1816 				break;
1817 
1818 			minSize += heap->page_size;
1819 			maxSize += heap->page_size;
1820 		}
1821 	}
1822 
1823 	areaReadLocker.Unlock();
1824 
1825 #if KERNEL_HEAP_LEAK_CHECK
1826 	newSize += sizeof(heap_leak_check_info);
1827 #endif
1828 
1829 	// does the new allocation simply fit in the old allocation?
1830 	if (newSize > minSize && newSize <= maxSize) {
1831 #if KERNEL_HEAP_LEAK_CHECK
1832 		// update the size info (the info is at the end so stays where it is)
1833 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1834 			+ maxSize - sizeof(heap_leak_check_info));
1835 		info->size = newSize - sizeof(heap_leak_check_info);
1836 		newSize -= sizeof(heap_leak_check_info);
1837 #endif
1838 
1839 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1840 		*newAddress = address;
1841 		return B_OK;
1842 	}
1843 
1844 #if KERNEL_HEAP_LEAK_CHECK
1845 	// new leak check info will be created with the malloc below
1846 	newSize -= sizeof(heap_leak_check_info);
1847 #endif
1848 
1849 	// if not, allocate a new chunk of memory
1850 	*newAddress = memalign(0, newSize);
1851 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1852 	if (*newAddress == NULL) {
1853 		// we tried but it didn't work out, but still the operation is done
1854 		return B_OK;
1855 	}
1856 
1857 	// copy the old data and free the old allocation
1858 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1859 	heap_free(heap, address);
1860 	return B_OK;
1861 }
1862 
1863 
1864 inline uint32
1865 heap_index_for(size_t size, int32 cpu)
1866 {
1867 #if KERNEL_HEAP_LEAK_CHECK
1868 	// take the extra info size into account
1869 	size += sizeof(heap_leak_check_info_s);
1870 #endif
1871 
1872 	uint32 index = 0;
1873 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1874 		if (size <= sHeapClasses[index].max_allocation_size)
1875 			break;
1876 	}
1877 
1878 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1879 }
1880 
1881 
1882 static void *
1883 memalign_nogrow(size_t alignment, size_t size)
1884 {
1885 	// use dedicated memory in the grow thread by default
1886 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1887 		void *result = heap_memalign(sGrowHeap, alignment, size);
1888 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1889 			// hopefully the heap grower will manage to create a new heap
1890 			// before running out of private memory...
1891 			dprintf("heap: requesting new grow heap\n");
1892 			sAddGrowHeap = true;
1893 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1894 		}
1895 
1896 		if (result != NULL)
1897 			return result;
1898 	}
1899 
1900 	// try public memory, there might be something available
1901 	void *result = NULL;
1902 	int32 cpuCount = MIN(smp_get_num_cpus(),
1903 		(int32)sHeapCount / HEAP_CLASS_COUNT);
1904 	int32 cpuNumber = smp_get_current_cpu();
1905 	for (int32 i = 0; i < cpuCount; i++) {
1906 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1907 		heap_allocator *heap = sHeaps[heapIndex];
1908 		result = heap_memalign(heap, alignment, size);
1909 		if (result != NULL)
1910 			return result;
1911 	}
1912 
1913 	// no memory available
1914 	if (thread_get_current_thread_id() == sHeapGrowThread)
1915 		panic("heap: all heaps have run out of memory while growing\n");
1916 	else
1917 		dprintf("heap: all heaps have run out of memory\n");
1918 
1919 	return NULL;
1920 }
1921 
1922 
1923 static status_t
1924 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1925 {
1926 	void *address = NULL;
1927 	area_id heapArea = create_area(name, &address,
1928 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1929 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1930 	if (heapArea < B_OK) {
1931 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1932 		return heapArea;
1933 	}
1934 
1935 	heap_add_area(heap, heapArea, (addr_t)address, size);
1936 #if PARANOID_HEAP_VALIDATION
1937 	heap_validate_heap(heap);
1938 #endif
1939 	return B_OK;
1940 }
1941 
1942 
1943 static int32
1944 heap_grow_thread(void *)
1945 {
1946 	while (true) {
1947 		// wait for a request to grow the heap list
1948 		if (acquire_sem(sHeapGrowSem) < B_OK)
1949 			continue;
1950 
1951 		if (sAddGrowHeap) {
1952 			// the grow heap is going to run full soon, try to allocate a new
1953 			// one to make some room.
1954 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1955 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1956 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1957 				dprintf("heap_grower: failed to create new grow heap area\n");
1958 		}
1959 
1960 		for (uint32 i = 0; i < sHeapCount; i++) {
1961 			heap_allocator *heap = sHeaps[i];
1962 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1963 				|| heap_should_grow(heap)) {
1964 				// grow this heap if it is nearly full or if a grow was
1965 				// explicitly requested for this heap (happens when a large
1966 				// allocation cannot be fulfilled due to lack of contiguous
1967 				// pages)
1968 				if (heap_create_new_heap_area(heap, "additional heap",
1969 						HEAP_GROW_SIZE) != B_OK)
1970 					dprintf("heap_grower: failed to create new heap area\n");
1971 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1972 			}
1973 		}
1974 
1975 		// notify anyone waiting for this request
1976 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 
1983 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
1984 
1985 
1986 static void
1987 deferred_deleter(void *arg, int iteration)
1988 {
1989 	// move entries and deletables to on-stack lists
1990 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1991 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1992 		return;
1993 
1994 	DeferredFreeList entries;
1995 	entries.MoveFrom(&sDeferredFreeList);
1996 
1997 	DeferredDeletableList deletables;
1998 	deletables.MoveFrom(&sDeferredDeletableList);
1999 
2000 	locker.Unlock();
2001 
2002 	// free the entries
2003 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
2004 		free(entry);
2005 
2006 	// delete the deletables
2007 	while (DeferredDeletable* deletable = deletables.RemoveHead())
2008 		delete deletable;
2009 }
2010 
2011 
2012 //	#pragma mark -
2013 
2014 
2015 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2016 
2017 
2018 status_t
2019 heap_init(addr_t base, size_t size)
2020 {
2021 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2022 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2023 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2024 			&sHeapClasses[i], false);
2025 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2026 		base += partSize;
2027 		sHeapCount++;
2028 	}
2029 
2030 	// set up some debug commands
2031 	add_debugger_command_etc("heap", &dump_heap_list,
2032 		"Dump infos about the kernel heap(s)",
2033 		"[(\"grow\" | \"stats\" | <heap>)]\n"
2034 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2035 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2036 		"given as the argument, currently only the heap count is printed.\n"
2037 		"If <heap> is given, it is interpreted as the address of the heap to\n"
2038 		"print infos about.\n", 0);
2039 #if !KERNEL_HEAP_LEAK_CHECK
2040 	add_debugger_command_etc("allocations", &dump_allocations,
2041 		"Dump current heap allocations",
2042 		"[\"stats\"] [<heap>]\n"
2043 		"If no parameters are given, all current alloactions are dumped.\n"
2044 		"If the optional argument \"stats\" is specified, only the allocation\n"
2045 		"counts and no individual allocations are printed\n"
2046 		"If a specific heap address is given, only allocations of this\n"
2047 		"allocator are dumped\n", 0);
2048 #else // !KERNEL_HEAP_LEAK_CHECK
2049 	add_debugger_command_etc("allocations", &dump_allocations,
2050 		"Dump current heap allocations",
2051 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2052 		"If no parameters are given, all current alloactions are dumped.\n"
2053 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2054 		"argument, only allocations matching the team ID, thread ID, caller\n"
2055 		"address or allocated address given in the second argument are printed.\n"
2056 		"If the optional argument \"stats\" is specified, only the allocation\n"
2057 		"counts and no individual allocations are printed\n", 0);
2058 	add_debugger_command_etc("allocations_per_caller",
2059 		&dump_allocations_per_caller,
2060 		"Dump current heap allocations summed up per caller",
2061 		"[ \"-c\" ] [ -h <heap> ]\n"
2062 		"The current allocations will by summed up by caller (their count and\n"
2063 		"size) printed in decreasing order by size or, if \"-c\" is\n"
2064 		"specified, by allocation count. If given <heap> specifies the\n"
2065 		"address of the heap for which to print the allocations.\n", 0);
2066 #endif // KERNEL_HEAP_LEAK_CHECK
2067 	return B_OK;
2068 }
2069 
2070 
2071 status_t
2072 heap_init_post_area()
2073 {
2074 	void *address = NULL;
2075 	area_id growHeapArea = create_area("dedicated grow heap", &address,
2076 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2077 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2078 	if (growHeapArea < 0) {
2079 		panic("heap_init_post_area(): couldn't allocate dedicate grow heap "
2080 			"area");
2081 		return growHeapArea;
2082 	}
2083 
2084 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2085 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2086 	if (sGrowHeap == NULL) {
2087 		panic("heap_init_post_area(): failed to create dedicated grow heap\n");
2088 		return B_ERROR;
2089 	}
2090 
2091 	// create the VIP heap
2092 	static const heap_class heapClass = {
2093 		"VIP I/O",					/* name */
2094 		100,						/* initial percentage */
2095 		B_PAGE_SIZE / 8,			/* max allocation size */
2096 		B_PAGE_SIZE,				/* page size */
2097 		8,							/* min bin size */
2098 		4,							/* bin alignment */
2099 		8,							/* min count per page */
2100 		16							/* max waste per page */
2101 	};
2102 
2103 	area_id vipHeapArea = create_area("VIP heap", &address,
2104 		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2105 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2106 	if (vipHeapArea < 0) {
2107 		panic("heap_init_post_area(): couldn't allocate VIP heap area");
2108 		return B_ERROR;
2109 	}
2110 
2111 	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2112 		VIP_HEAP_SIZE, &heapClass, false);
2113 	if (sVIPHeap == NULL) {
2114 		panic("heap_init_post_area(): failed to create VIP heap\n");
2115 		return B_ERROR;
2116 	}
2117 
2118 	dprintf("heap_init_post_area(): created VIP heap: %p\n", sVIPHeap);
2119 
2120 	return B_OK;
2121 }
2122 
2123 
2124 status_t
2125 heap_init_post_sem()
2126 {
2127 #if PARANOID_KERNEL_MALLOC
2128 	vm_block_address_range("uninitialized heap memory",
2129 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2130 #endif
2131 #if PARANOID_KERNEL_FREE
2132 	vm_block_address_range("freed heap memory",
2133 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2134 #endif
2135 
2136 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2137 	if (sHeapGrowSem < 0) {
2138 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2139 		return B_ERROR;
2140 	}
2141 
2142 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2143 	if (sHeapGrownNotify < 0) {
2144 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2145 		return B_ERROR;
2146 	}
2147 
2148 	return B_OK;
2149 }
2150 
2151 
2152 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2153 
2154 
2155 status_t
2156 heap_init_post_thread()
2157 {
2158 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
2159 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2160 		B_URGENT_PRIORITY, NULL);
2161 	if (sHeapGrowThread < 0) {
2162 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2163 		return sHeapGrowThread;
2164 	}
2165 
2166 	// create per-cpu heaps if there's enough memory
2167 	int32 heapCount = MIN(smp_get_num_cpus(),
2168 		(int32)vm_page_num_pages() / 60 / 1024);
2169 	for (int32 i = 1; i < heapCount; i++) {
2170 		addr_t base = 0;
2171 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2172 		area_id perCPUHeapArea = create_area("per cpu initial heap",
2173 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2174 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2175 		if (perCPUHeapArea < 0)
2176 			break;
2177 
2178 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2179 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2180 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2181 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2182 				base, partSize, &sHeapClasses[j], false);
2183 			sLastGrowRequest[heapIndex] = 0;
2184 			sLastHandledGrowRequest[heapIndex] = 0;
2185 			base += partSize;
2186 			sHeapCount++;
2187 		}
2188 	}
2189 
2190 	resume_thread(sHeapGrowThread);
2191 
2192 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2193 
2194 	// run the deferred deleter roughly once a second
2195 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2196 		panic("heap_init_post_thread(): failed to init deferred deleter");
2197 
2198 	return B_OK;
2199 }
2200 
2201 
2202 //	#pragma mark - Public API
2203 
2204 
2205 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2206 
2207 
2208 void *
2209 memalign(size_t alignment, size_t size)
2210 {
2211 	if (!gKernelStartup && !are_interrupts_enabled()) {
2212 		panic("memalign(): called with interrupts disabled\n");
2213 		return NULL;
2214 	}
2215 
2216 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2217 		// don't even attempt such a huge allocation - use areas instead
2218 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2219 			+ alignment, B_PAGE_SIZE);
2220 		if (areaSize < size) {
2221 			// the size overflowed
2222 			return NULL;
2223 		}
2224 
2225 		void *address = NULL;
2226 		area_id allocationArea = create_area("memalign area", &address,
2227 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2228 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2229 		if (allocationArea < B_OK) {
2230 			dprintf("heap: failed to create area for huge allocation\n");
2231 			return NULL;
2232 		}
2233 
2234 		area_allocation_info *info = (area_allocation_info *)address;
2235 		info->magic = kAreaAllocationMagic;
2236 		info->area = allocationArea;
2237 		info->base = address;
2238 		info->size = areaSize;
2239 		info->allocation_size = size;
2240 		info->allocation_alignment = alignment;
2241 
2242 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2243 		if (alignment != 0) {
2244 			address = (void *)ROUNDUP((addr_t)address, alignment);
2245 			ASSERT((addr_t)address % alignment == 0);
2246 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2247 		}
2248 
2249 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2250 			allocationArea, size));
2251 
2252 		info->allocation_base = address;
2253 
2254 #if PARANOID_KERNEL_MALLOC
2255 		memset(address, 0xcc, size);
2256 #endif
2257 		return address;
2258 	}
2259 
2260 	void *result = NULL;
2261 	bool shouldGrow = false;
2262 	int32 cpuCount = MIN(smp_get_num_cpus(),
2263 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2264 	int32 cpuNumber = smp_get_current_cpu();
2265 	for (int32 i = 0; i < cpuCount; i++) {
2266 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2267 		heap_allocator *heap = sHeaps[heapIndex];
2268 		result = heap_memalign(heap, alignment, size);
2269 		if (result != NULL) {
2270 			shouldGrow = heap_should_grow(heap);
2271 			break;
2272 		}
2273 
2274 #if PARANOID_HEAP_VALIDATION
2275 		heap_validate_heap(heap);
2276 #endif
2277 	}
2278 
2279 	if (result == NULL) {
2280 		// request an urgent grow and wait - we don't do it ourselfs here to
2281 		// serialize growing through the grow thread, as otherwise multiple
2282 		// threads hitting this situation (likely when memory ran out) would
2283 		// all add areas
2284 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2285 		sLastGrowRequest[heapIndex]++;
2286 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2287 
2288 		// and then try again
2289 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2290 	} else if (shouldGrow) {
2291 		// should grow sometime soon, notify the grower
2292 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2293 	}
2294 
2295 	if (result == NULL)
2296 		panic("heap: kernel heap has run out of memory\n");
2297 	return result;
2298 }
2299 
2300 
2301 void *
2302 memalign_etc(size_t alignment, size_t size, uint32 flags)
2303 {
2304 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2305 		return heap_memalign(sVIPHeap, alignment, size);
2306 
2307 	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2308 			!= 0) {
2309 		return memalign_nogrow(alignment, size);
2310 	}
2311 
2312 	return memalign(alignment, size);
2313 }
2314 
2315 
2316 void
2317 free_etc(void *address, uint32 flags)
2318 {
2319 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2320 		heap_free(sVIPHeap, address);
2321 	else
2322 		free(address);
2323 }
2324 
2325 
2326 void *
2327 malloc(size_t size)
2328 {
2329 	return memalign(0, size);
2330 }
2331 
2332 
2333 void
2334 free(void *address)
2335 {
2336 	if (!gKernelStartup && !are_interrupts_enabled()) {
2337 		panic("free(): called with interrupts disabled\n");
2338 		return;
2339 	}
2340 
2341 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2342 	for (uint32 i = 0; i < sHeapCount; i++) {
2343 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2344 		if (heap_free(heap, address) == B_OK) {
2345 #if PARANOID_HEAP_VALIDATION
2346 			heap_validate_heap(heap);
2347 #endif
2348 			return;
2349 		}
2350 	}
2351 
2352 	// maybe it was allocated from the dedicated grow heap
2353 	if (heap_free(sGrowHeap, address) == B_OK)
2354 		return;
2355 
2356 	// or maybe it was allocated from the VIP heap
2357 	if (heap_free(sVIPHeap, address) == B_OK)
2358 		return;
2359 
2360 	// or maybe it was a huge allocation using an area
2361 	area_info areaInfo;
2362 	area_id area = area_for(address);
2363 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2364 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2365 
2366 		// just make extra sure it was allocated by us
2367 		if (info->magic == kAreaAllocationMagic && info->area == area
2368 			&& info->size == areaInfo.size && info->base == areaInfo.address
2369 			&& info->allocation_size < areaInfo.size) {
2370 			delete_area(area);
2371 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2372 				area));
2373 			return;
2374 		}
2375 	}
2376 
2377 	panic("free(): free failed for address %p\n", address);
2378 }
2379 
2380 
2381 void *
2382 realloc(void *address, size_t newSize)
2383 {
2384 	if (!gKernelStartup && !are_interrupts_enabled()) {
2385 		panic("realloc(): called with interrupts disabled\n");
2386 		return NULL;
2387 	}
2388 
2389 	if (address == NULL)
2390 		return memalign(0, newSize);
2391 
2392 	if (newSize == 0) {
2393 		free(address);
2394 		return NULL;
2395 	}
2396 
2397 	void *newAddress = NULL;
2398 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2399 	for (uint32 i = 0; i < sHeapCount; i++) {
2400 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2401 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2402 #if PARANOID_HEAP_VALIDATION
2403 			heap_validate_heap(heap);
2404 #endif
2405 			return newAddress;
2406 		}
2407 	}
2408 
2409 	// maybe it was allocated from the dedicated grow heap
2410 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2411 		return newAddress;
2412 
2413 	// or maybe it was a huge allocation using an area
2414 	area_info areaInfo;
2415 	area_id area = area_for(address);
2416 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2417 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2418 
2419 		// just make extra sure it was allocated by us
2420 		if (info->magic == kAreaAllocationMagic && info->area == area
2421 			&& info->size == areaInfo.size && info->base == areaInfo.address
2422 			&& info->allocation_size < areaInfo.size) {
2423 			size_t available = info->size - ((addr_t)info->allocation_base
2424 				- (addr_t)info->base);
2425 
2426 			if (available >= newSize) {
2427 				// there is enough room available for the newSize
2428 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2429 					"available\n", newSize, area, available));
2430 				info->allocation_size = newSize;
2431 				return address;
2432 			}
2433 
2434 			// have to allocate/copy/free - TODO maybe resize the area instead?
2435 			newAddress = memalign(0, newSize);
2436 			if (newAddress == NULL) {
2437 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2438 					newSize);
2439 				return NULL;
2440 			}
2441 
2442 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2443 			delete_area(area);
2444 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2445 				"old area %ld\n", newAddress, newSize, area));
2446 			return newAddress;
2447 		}
2448 	}
2449 
2450 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2451 		newSize);
2452 	return NULL;
2453 }
2454 
2455 
2456 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2457 
2458 
2459 void *
2460 calloc(size_t numElements, size_t size)
2461 {
2462 	void *address = memalign(0, numElements * size);
2463 	if (address != NULL)
2464 		memset(address, 0, numElements * size);
2465 
2466 	return address;
2467 }
2468 
2469 
2470 void
2471 deferred_free(void *block)
2472 {
2473 	if (block == NULL)
2474 		return;
2475 
2476 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2477 
2478 	InterruptsSpinLocker _(sDeferredFreeListLock);
2479 	sDeferredFreeList.Add(entry);
2480 }
2481 
2482 
2483 void *
2484 malloc_referenced(size_t size)
2485 {
2486 	int32 *referencedData = (int32 *)malloc(size + 4);
2487 	if (referencedData == NULL)
2488 		return NULL;
2489 
2490 	*referencedData = 1;
2491 	return referencedData + 1;
2492 }
2493 
2494 
2495 void *
2496 malloc_referenced_acquire(void *data)
2497 {
2498 	if (data != NULL) {
2499 		int32 *referencedData = (int32 *)data - 1;
2500 		atomic_add(referencedData, 1);
2501 	}
2502 
2503 	return data;
2504 }
2505 
2506 
2507 void
2508 malloc_referenced_release(void *data)
2509 {
2510 	if (data == NULL)
2511 		return;
2512 
2513 	int32 *referencedData = (int32 *)data - 1;
2514 	if (atomic_add(referencedData, -1) < 1)
2515 		free(referencedData);
2516 }
2517 
2518 
2519 DeferredDeletable::~DeferredDeletable()
2520 {
2521 }
2522 
2523 
2524 void
2525 deferred_delete(DeferredDeletable *deletable)
2526 {
2527 	if (deletable == NULL)
2528 		return;
2529 
2530 	InterruptsSpinLocker _(sDeferredFreeListLock);
2531 	sDeferredDeletableList.Add(deletable);
2532 }
2533