xref: /haiku/src/system/kernel/heap.cpp (revision a5bf12376daeded4049521eb17a6cc41192250d9)
1 /*
2  * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <string.h>
19 #include <team.h>
20 #include <thread.h>
21 #include <tracing.h>
22 #include <util/AutoLock.h>
23 #include <vm/vm.h>
24 #include <vm/vm_page.h>
25 
26 
27 //#define TRACE_HEAP
28 #ifdef TRACE_HEAP
29 #	define TRACE(x) dprintf x
30 #else
31 #	define TRACE(x) ;
32 #endif
33 
34 
35 #if KERNEL_HEAP_LEAK_CHECK
36 typedef struct heap_leak_check_info_s {
37 	addr_t		caller;
38 	size_t		size;
39 	thread_id	thread;
40 	team_id		team;
41 } heap_leak_check_info;
42 
43 struct caller_info {
44 	addr_t		caller;
45 	uint32		count;
46 	uint32		size;
47 };
48 
49 static const int32 kCallerInfoTableSize = 1024;
50 static caller_info sCallerInfoTable[kCallerInfoTableSize];
51 static int32 sCallerInfoCount = 0;
52 #endif
53 
54 
55 typedef struct heap_page_s heap_page;
56 
57 
58 typedef struct heap_area_s {
59 	area_id			area;
60 
61 	addr_t			base;
62 	size_t			size;
63 
64 	uint32			page_count;
65 	uint32			free_page_count;
66 
67 	heap_page *		free_pages;
68 	heap_page *		page_table;
69 
70 	heap_area_s *	prev;
71 	heap_area_s *	next;
72 	heap_area_s *	all_next;
73 } heap_area;
74 
75 
76 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
77 
78 typedef struct heap_page_s {
79 	heap_area *		area;
80 	uint16			index;
81 	uint16			bin_index : 5;
82 	uint16			free_count : 10;
83 	uint16			in_use : 1;
84 	heap_page_s *	next;
85 	heap_page_s *	prev;
86 	union {
87 		uint16			empty_index;
88 		uint16			allocation_id; // used for bin == bin_count allocations
89 	};
90 	addr_t *		free_list;
91 } heap_page;
92 
93 
94 typedef struct heap_bin_s {
95 	mutex		lock;
96 	uint32		element_size;
97 	uint16		max_free_count;
98 	heap_page *	page_list; // sorted so that the desired page is always first
99 } heap_bin;
100 
101 
102 struct heap_allocator_s {
103 	rw_lock		area_lock;
104 	mutex		page_lock;
105 
106 	const char *name;
107 	uint32		bin_count;
108 	uint32		page_size;
109 
110 	uint32		total_pages;
111 	uint32		total_free_pages;
112 	uint32		empty_areas;
113 
114 #if KERNEL_HEAP_LEAK_CHECK
115 	addr_t		(*get_caller)();
116 #endif
117 
118 	heap_bin *	bins;
119 	heap_area *	areas; // sorted so that the desired area is always first
120 	heap_area *	all_areas; // all areas including full ones
121 };
122 
123 
124 static const uint32 kAreaAllocationMagic = 'AAMG';
125 typedef struct area_allocation_info_s {
126 	area_id		area;
127 	void *		base;
128 	uint32		magic;
129 	size_t		size;
130 	size_t		allocation_size;
131 	size_t		allocation_alignment;
132 	void *		allocation_base;
133 } area_allocation_info;
134 
135 
136 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
137 };
138 
139 
140 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
141 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
142 
143 
144 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
145 
146 #define VIP_HEAP_SIZE	1024 * 1024
147 
148 // Heap class configuration
149 #define HEAP_CLASS_COUNT 3
150 static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
151 	{
152 		"small",					/* name */
153 		50,							/* initial percentage */
154 		B_PAGE_SIZE / 8,			/* max allocation size */
155 		B_PAGE_SIZE,				/* page size */
156 		8,							/* min bin size */
157 		4,							/* bin alignment */
158 		8,							/* min count per page */
159 		16							/* max waste per page */
160 	},
161 	{
162 		"medium",					/* name */
163 		30,							/* initial percentage */
164 		B_PAGE_SIZE * 2,			/* max allocation size */
165 		B_PAGE_SIZE * 8,			/* page size */
166 		B_PAGE_SIZE / 8,			/* min bin size */
167 		32,							/* bin alignment */
168 		4,							/* min count per page */
169 		64							/* max waste per page */
170 	},
171 	{
172 		"large",					/* name */
173 		20,							/* initial percentage */
174 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
175 		B_PAGE_SIZE * 16,			/* page size */
176 		B_PAGE_SIZE * 2,			/* min bin size */
177 		128,						/* bin alignment */
178 		1,							/* min count per page */
179 		256							/* max waste per page */
180 	}
181 };
182 
183 
184 static uint32 sHeapCount;
185 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
186 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
187 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
188 
189 static heap_allocator *sVIPHeap;
190 static heap_allocator *sGrowHeap = NULL;
191 static thread_id sHeapGrowThread = -1;
192 static sem_id sHeapGrowSem = -1;
193 static sem_id sHeapGrownNotify = -1;
194 static bool sAddGrowHeap = false;
195 
196 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
197 
198 static DeferredFreeList sDeferredFreeList;
199 static DeferredDeletableList sDeferredDeletableList;
200 static spinlock sDeferredFreeListLock;
201 
202 
203 
204 // #pragma mark - Tracing
205 
206 #if KERNEL_HEAP_TRACING
207 namespace KernelHeapTracing {
208 
209 class Allocate : public AbstractTraceEntry {
210 	public:
211 		Allocate(addr_t address, size_t size)
212 			:	fAddress(address),
213 				fSize(size)
214 		{
215 			Initialized();
216 		}
217 
218 		virtual void AddDump(TraceOutput &out)
219 		{
220 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
221 		}
222 
223 	private:
224 		addr_t	fAddress;
225 		size_t	fSize;
226 };
227 
228 
229 class Reallocate : public AbstractTraceEntry {
230 	public:
231 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
232 			:	fOldAddress(oldAddress),
233 				fNewAddress(newAddress),
234 				fNewSize(newSize)
235 		{
236 			Initialized();
237 		};
238 
239 		virtual void AddDump(TraceOutput &out)
240 		{
241 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
242 				fOldAddress, fNewAddress, fNewSize);
243 		}
244 
245 	private:
246 		addr_t	fOldAddress;
247 		addr_t	fNewAddress;
248 		size_t	fNewSize;
249 };
250 
251 
252 class Free : public AbstractTraceEntry {
253 	public:
254 		Free(addr_t address)
255 			:	fAddress(address)
256 		{
257 			Initialized();
258 		};
259 
260 		virtual void AddDump(TraceOutput &out)
261 		{
262 			out.Print("heap free: 0x%08lx", fAddress);
263 		}
264 
265 	private:
266 		addr_t	fAddress;
267 };
268 
269 
270 } // namespace KernelHeapTracing
271 
272 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
273 #else
274 #	define T(x)	;
275 #endif
276 
277 
278 // #pragma mark - Debug functions
279 
280 
281 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
282 
283 
284 #if KERNEL_HEAP_LEAK_CHECK
285 static addr_t
286 get_caller()
287 {
288 	// Find the first return address outside of the allocator code. Note, that
289 	// this makes certain assumptions about how the code for the functions
290 	// ends up in the kernel object.
291 	addr_t returnAddresses[5];
292 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
293 		STACK_TRACE_KERNEL);
294 	for (int32 i = 0; i < depth; i++) {
295 		if (returnAddresses[i] < (addr_t)&get_caller
296 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
297 			return returnAddresses[i];
298 		}
299 	}
300 
301 	return 0;
302 }
303 #endif
304 
305 
306 static void
307 dump_page(heap_page *page)
308 {
309 	uint32 count = 0;
310 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
311 		count++;
312 
313 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
314 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
315 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
316 }
317 
318 
319 static void
320 dump_bin(heap_bin *bin)
321 {
322 	uint32 count = 0;
323 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
324 		count++;
325 
326 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
327 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
328 
329 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
330 		dump_page(page);
331 }
332 
333 
334 static void
335 dump_bin_list(heap_allocator *heap)
336 {
337 	for (uint32 i = 0; i < heap->bin_count; i++)
338 		dump_bin(&heap->bins[i]);
339 	kprintf("\n");
340 }
341 
342 
343 static void
344 dump_allocator_areas(heap_allocator *heap)
345 {
346 	heap_area *area = heap->all_areas;
347 	while (area) {
348 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
349 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
350 			area->size, area->page_count, area->free_pages,
351 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
352 		area = area->all_next;
353 	}
354 
355 	kprintf("\n");
356 }
357 
358 
359 static void
360 dump_allocator(heap_allocator *heap, bool areas, bool bins)
361 {
362 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
363 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
364 		heap->page_size, heap->bin_count, heap->total_pages,
365 		heap->total_free_pages, heap->empty_areas);
366 
367 	if (areas)
368 		dump_allocator_areas(heap);
369 	if (bins)
370 		dump_bin_list(heap);
371 }
372 
373 
374 static int
375 dump_heap_list(int argc, char **argv)
376 {
377 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
378 		// only dump dedicated grow heap info
379 		kprintf("dedicated grow heap:\n");
380 		dump_allocator(sGrowHeap, true, true);
381 		return 0;
382 	}
383 
384 	bool stats = false;
385 	int i = 1;
386 
387 	if (strcmp(argv[1], "stats") == 0) {
388 		stats = true;
389 		i++;
390 	}
391 
392 	uint64 heapAddress = 0;
393 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
394 		print_debugger_command_usage(argv[0]);
395 		return 0;
396 	}
397 
398 	if (heapAddress == 0) {
399 		// dump default kernel heaps
400 		for (uint32 i = 0; i < sHeapCount; i++)
401 			dump_allocator(sHeaps[i], !stats, !stats);
402 	} else {
403 		// dump specified heap
404 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
405 	}
406 
407 	return 0;
408 }
409 
410 
411 #if !KERNEL_HEAP_LEAK_CHECK
412 
413 static int
414 dump_allocations(int argc, char **argv)
415 {
416 	uint64 heapAddress = 0;
417 	bool statsOnly = false;
418 	for (int32 i = 1; i < argc; i++) {
419 		if (strcmp(argv[i], "stats") == 0)
420 			statsOnly = true;
421 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
422 			print_debugger_command_usage(argv[0]);
423 			return 0;
424 		}
425 	}
426 
427 	size_t totalSize = 0;
428 	uint32 totalCount = 0;
429 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
430 		heap_allocator *heap = sHeaps[heapIndex];
431 		if (heapAddress != 0)
432 			heap = (heap_allocator *)(addr_t)heapAddress;
433 
434 		// go through all the pages in all the areas
435 		heap_area *area = heap->all_areas;
436 		while (area) {
437 			for (uint32 i = 0; i < area->page_count; i++) {
438 				heap_page *page = &area->page_table[i];
439 				if (!page->in_use)
440 					continue;
441 
442 				addr_t base = area->base + i * heap->page_size;
443 				if (page->bin_index < heap->bin_count) {
444 					// page is used by a small allocation bin
445 					uint32 elementCount = page->empty_index;
446 					size_t elementSize
447 						= heap->bins[page->bin_index].element_size;
448 					for (uint32 j = 0; j < elementCount;
449 							j++, base += elementSize) {
450 						// walk the free list to see if this element is in use
451 						bool elementInUse = true;
452 						for (addr_t *temp = page->free_list; temp != NULL;
453 								temp = (addr_t *)*temp) {
454 							if ((addr_t)temp == base) {
455 								elementInUse = false;
456 								break;
457 							}
458 						}
459 
460 						if (!elementInUse)
461 							continue;
462 
463 						if (!statsOnly) {
464 							kprintf("address: 0x%08lx; size: %lu bytes\n",
465 								base, elementSize);
466 						}
467 
468 						totalSize += elementSize;
469 						totalCount++;
470 					}
471 				} else {
472 					// page is used by a big allocation, find the page count
473 					uint32 pageCount = 1;
474 					while (i + pageCount < area->page_count
475 						&& area->page_table[i + pageCount].in_use
476 						&& area->page_table[i + pageCount].bin_index
477 							== heap->bin_count
478 						&& area->page_table[i + pageCount].allocation_id
479 							== page->allocation_id)
480 						pageCount++;
481 
482 					size_t size = pageCount * heap->page_size;
483 
484 					if (!statsOnly) {
485 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
486 							size);
487 					}
488 
489 					totalSize += size;
490 					totalCount++;
491 
492 					// skip the allocated pages
493 					i += pageCount - 1;
494 				}
495 			}
496 
497 			area = area->all_next;
498 		}
499 
500 		if (heapAddress != 0)
501 			break;
502 	}
503 
504 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
505 	return 0;
506 }
507 
508 #else // !KERNEL_HEAP_LEAK_CHECK
509 
510 static int
511 dump_allocations(int argc, char **argv)
512 {
513 	team_id team = -1;
514 	thread_id thread = -1;
515 	addr_t caller = 0;
516 	addr_t address = 0;
517 	bool statsOnly = false;
518 
519 	for (int32 i = 1; i < argc; i++) {
520 		if (strcmp(argv[i], "team") == 0)
521 			team = parse_expression(argv[++i]);
522 		else if (strcmp(argv[i], "thread") == 0)
523 			thread = parse_expression(argv[++i]);
524 		else if (strcmp(argv[i], "caller") == 0)
525 			caller = parse_expression(argv[++i]);
526 		else if (strcmp(argv[i], "address") == 0)
527 			address = parse_expression(argv[++i]);
528 		else if (strcmp(argv[i], "stats") == 0)
529 			statsOnly = true;
530 		else {
531 			print_debugger_command_usage(argv[0]);
532 			return 0;
533 		}
534 	}
535 
536 	size_t totalSize = 0;
537 	uint32 totalCount = 0;
538 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
539 		heap_allocator *heap = sHeaps[heapIndex];
540 
541 		// go through all the pages in all the areas
542 		heap_area *area = heap->all_areas;
543 		while (area) {
544 			heap_leak_check_info *info = NULL;
545 			for (uint32 i = 0; i < area->page_count; i++) {
546 				heap_page *page = &area->page_table[i];
547 				if (!page->in_use)
548 					continue;
549 
550 				addr_t base = area->base + i * heap->page_size;
551 				if (page->bin_index < heap->bin_count) {
552 					// page is used by a small allocation bin
553 					uint32 elementCount = page->empty_index;
554 					size_t elementSize
555 						= heap->bins[page->bin_index].element_size;
556 					for (uint32 j = 0; j < elementCount;
557 							j++, base += elementSize) {
558 						// walk the free list to see if this element is in use
559 						bool elementInUse = true;
560 						for (addr_t *temp = page->free_list; temp != NULL;
561 								temp = (addr_t *)*temp) {
562 							if ((addr_t)temp == base) {
563 								elementInUse = false;
564 								break;
565 							}
566 						}
567 
568 						if (!elementInUse)
569 							continue;
570 
571 						info = (heap_leak_check_info *)(base + elementSize
572 							- sizeof(heap_leak_check_info));
573 
574 						if ((team == -1 || info->team == team)
575 							&& (thread == -1 || info->thread == thread)
576 							&& (caller == 0 || info->caller == caller)
577 							&& (address == 0 || base == address)) {
578 							// interesting...
579 							if (!statsOnly) {
580 								kprintf("team: % 6ld; thread: % 6ld; "
581 									"address: 0x%08lx; size: %lu bytes; "
582 									"caller: %#lx\n", info->team, info->thread,
583 									base, info->size, info->caller);
584 							}
585 
586 							totalSize += info->size;
587 							totalCount++;
588 						}
589 					}
590 				} else {
591 					// page is used by a big allocation, find the page count
592 					uint32 pageCount = 1;
593 					while (i + pageCount < area->page_count
594 						&& area->page_table[i + pageCount].in_use
595 						&& area->page_table[i + pageCount].bin_index
596 							== heap->bin_count
597 						&& area->page_table[i + pageCount].allocation_id
598 							== page->allocation_id)
599 						pageCount++;
600 
601 					info = (heap_leak_check_info *)(base + pageCount
602 						* heap->page_size - sizeof(heap_leak_check_info));
603 
604 					if ((team == -1 || info->team == team)
605 						&& (thread == -1 || info->thread == thread)
606 						&& (caller == 0 || info->caller == caller)
607 						&& (address == 0 || base == address)) {
608 						// interesting...
609 						if (!statsOnly) {
610 							kprintf("team: % 6ld; thread: % 6ld;"
611 								" address: 0x%08lx; size: %lu bytes;"
612 								" caller: %#lx\n", info->team, info->thread,
613 								base, info->size, info->caller);
614 						}
615 
616 						totalSize += info->size;
617 						totalCount++;
618 					}
619 
620 					// skip the allocated pages
621 					i += pageCount - 1;
622 				}
623 			}
624 
625 			area = area->all_next;
626 		}
627 	}
628 
629 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
630 		totalSize);
631 	return 0;
632 }
633 
634 
635 static caller_info*
636 get_caller_info(addr_t caller)
637 {
638 	// find the caller info
639 	for (int32 i = 0; i < sCallerInfoCount; i++) {
640 		if (caller == sCallerInfoTable[i].caller)
641 			return &sCallerInfoTable[i];
642 	}
643 
644 	// not found, add a new entry, if there are free slots
645 	if (sCallerInfoCount >= kCallerInfoTableSize)
646 		return NULL;
647 
648 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
649 	info->caller = caller;
650 	info->count = 0;
651 	info->size = 0;
652 
653 	return info;
654 }
655 
656 
657 static int
658 caller_info_compare_size(const void* _a, const void* _b)
659 {
660 	const caller_info* a = (const caller_info*)_a;
661 	const caller_info* b = (const caller_info*)_b;
662 	return (int)(b->size - a->size);
663 }
664 
665 
666 static int
667 caller_info_compare_count(const void* _a, const void* _b)
668 {
669 	const caller_info* a = (const caller_info*)_a;
670 	const caller_info* b = (const caller_info*)_b;
671 	return (int)(b->count - a->count);
672 }
673 
674 
675 static bool
676 analyze_allocation_callers(heap_allocator *heap)
677 {
678 	// go through all the pages in all the areas
679 	heap_area *area = heap->all_areas;
680 	while (area) {
681 		heap_leak_check_info *info = NULL;
682 		for (uint32 i = 0; i < area->page_count; i++) {
683 			heap_page *page = &area->page_table[i];
684 			if (!page->in_use)
685 				continue;
686 
687 			addr_t base = area->base + i * heap->page_size;
688 			if (page->bin_index < heap->bin_count) {
689 				// page is used by a small allocation bin
690 				uint32 elementCount = page->empty_index;
691 				size_t elementSize = heap->bins[page->bin_index].element_size;
692 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
693 					// walk the free list to see if this element is in use
694 					bool elementInUse = true;
695 					for (addr_t *temp = page->free_list; temp != NULL;
696 						temp = (addr_t *)*temp) {
697 						if ((addr_t)temp == base) {
698 							elementInUse = false;
699 							break;
700 						}
701 					}
702 
703 					if (!elementInUse)
704 						continue;
705 
706 					info = (heap_leak_check_info *)(base + elementSize
707 						- sizeof(heap_leak_check_info));
708 
709 					caller_info *callerInfo = get_caller_info(info->caller);
710 					if (callerInfo == NULL) {
711 						kprintf("out of space for caller infos\n");
712 						return 0;
713 					}
714 
715 					callerInfo->count++;
716 					callerInfo->size += info->size;
717 				}
718 			} else {
719 				// page is used by a big allocation, find the page count
720 				uint32 pageCount = 1;
721 				while (i + pageCount < area->page_count
722 					&& area->page_table[i + pageCount].in_use
723 					&& area->page_table[i + pageCount].bin_index
724 						== heap->bin_count
725 					&& area->page_table[i + pageCount].allocation_id
726 						== page->allocation_id) {
727 					pageCount++;
728 				}
729 
730 				info = (heap_leak_check_info *)(base + pageCount
731 					* heap->page_size - sizeof(heap_leak_check_info));
732 
733 				caller_info *callerInfo = get_caller_info(info->caller);
734 				if (callerInfo == NULL) {
735 					kprintf("out of space for caller infos\n");
736 					return false;
737 				}
738 
739 				callerInfo->count++;
740 				callerInfo->size += info->size;
741 
742 				// skip the allocated pages
743 				i += pageCount - 1;
744 			}
745 		}
746 
747 		area = area->all_next;
748 	}
749 
750 	return true;
751 }
752 
753 
754 static int
755 dump_allocations_per_caller(int argc, char **argv)
756 {
757 	bool sortBySize = true;
758 	heap_allocator *heap = NULL;
759 
760 	for (int32 i = 1; i < argc; i++) {
761 		if (strcmp(argv[i], "-c") == 0) {
762 			sortBySize = false;
763 		} else if (strcmp(argv[i], "-h") == 0) {
764 			uint64 heapAddress;
765 			if (++i >= argc
766 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
767 				print_debugger_command_usage(argv[0]);
768 				return 0;
769 			}
770 
771 			heap = (heap_allocator*)(addr_t)heapAddress;
772 		} else {
773 			print_debugger_command_usage(argv[0]);
774 			return 0;
775 		}
776 	}
777 
778 	sCallerInfoCount = 0;
779 
780 	if (heap != NULL) {
781 		if (!analyze_allocation_callers(heap))
782 			return 0;
783 	} else {
784 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
785 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
786 				return 0;
787 		}
788 	}
789 
790 	// sort the array
791 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
792 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
793 
794 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
795 		sortBySize ? "size" : "count");
796 
797 	kprintf("     count        size      caller\n");
798 	kprintf("----------------------------------\n");
799 	for (int32 i = 0; i < sCallerInfoCount; i++) {
800 		caller_info& info = sCallerInfoTable[i];
801 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
802 
803 		const char *symbol;
804 		const char *imageName;
805 		bool exactMatch;
806 		addr_t baseAddress;
807 
808 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
809 				&imageName, &exactMatch) == B_OK) {
810 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
811 				info.caller - baseAddress, imageName,
812 				exactMatch ? "" : " (nearest)");
813 		} else
814 			kprintf("\n");
815 	}
816 
817 	return 0;
818 }
819 
820 #endif // KERNEL_HEAP_LEAK_CHECK
821 
822 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
823 
824 
825 #if PARANOID_HEAP_VALIDATION
826 static void
827 heap_validate_heap(heap_allocator *heap)
828 {
829 	ReadLocker areaReadLocker(heap->area_lock);
830 	for (uint32 i = 0; i < heap->bin_count; i++)
831 		mutex_lock(&heap->bins[i].lock);
832 	MutexLocker pageLocker(heap->page_lock);
833 
834 	uint32 totalPageCount = 0;
835 	uint32 totalFreePageCount = 0;
836 	heap_area *area = heap->all_areas;
837 	while (area != NULL) {
838 		// validate the free pages list
839 		uint32 freePageCount = 0;
840 		heap_page *lastPage = NULL;
841 		heap_page *page = area->free_pages;
842 		while (page) {
843 			if ((addr_t)page < (addr_t)&area->page_table[0]
844 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
845 				panic("free page is not part of the page table\n");
846 
847 			if (page->index >= area->page_count)
848 				panic("free page has invalid index\n");
849 
850 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
851 				panic("free page index does not lead to target page\n");
852 
853 			if (page->prev != lastPage)
854 				panic("free page entry has invalid prev link\n");
855 
856 			if (page->in_use)
857 				panic("free page marked as in use\n");
858 
859 			lastPage = page;
860 			page = page->next;
861 			freePageCount++;
862 		}
863 
864 		totalPageCount += freePageCount;
865 		totalFreePageCount += freePageCount;
866 		if (area->free_page_count != freePageCount)
867 			panic("free page count doesn't match free page list\n");
868 
869 		// validate the page table
870 		uint32 usedPageCount = 0;
871 		for (uint32 i = 0; i < area->page_count; i++) {
872 			if (area->page_table[i].in_use)
873 				usedPageCount++;
874 		}
875 
876 		totalPageCount += usedPageCount;
877 		if (freePageCount + usedPageCount != area->page_count) {
878 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
879 				freePageCount, usedPageCount, area->page_count);
880 		}
881 
882 		area = area->all_next;
883 	}
884 
885 	// validate the areas
886 	area = heap->areas;
887 	heap_area *lastArea = NULL;
888 	uint32 lastFreeCount = 0;
889 	while (area != NULL) {
890 		if (area->free_page_count < lastFreeCount)
891 			panic("size ordering of area list broken\n");
892 
893 		if (area->prev != lastArea)
894 			panic("area list entry has invalid prev link\n");
895 
896 		lastArea = area;
897 		lastFreeCount = area->free_page_count;
898 		area = area->next;
899 	}
900 
901 	lastArea = NULL;
902 	area = heap->all_areas;
903 	while (area != NULL) {
904 		if (lastArea != NULL && lastArea->base < area->base)
905 			panic("base ordering of all_areas list broken\n");
906 
907 		lastArea = area;
908 		area = area->all_next;
909 	}
910 
911 	// validate the bins
912 	for (uint32 i = 0; i < heap->bin_count; i++) {
913 		heap_bin *bin = &heap->bins[i];
914 		heap_page *lastPage = NULL;
915 		heap_page *page = bin->page_list;
916 		lastFreeCount = 0;
917 		while (page) {
918 			area = heap->all_areas;
919 			while (area) {
920 				if (area == page->area)
921 					break;
922 				area = area->all_next;
923 			}
924 
925 			if (area == NULL) {
926 				panic("page area not present in area list\n");
927 				page = page->next;
928 				continue;
929 			}
930 
931 			if ((addr_t)page < (addr_t)&area->page_table[0]
932 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
933 				panic("used page is not part of the page table\n");
934 
935 			if (page->index >= area->page_count)
936 				panic("used page has invalid index\n");
937 
938 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
939 				panic("used page index does not lead to target page\n");
940 
941 			if (page->prev != lastPage) {
942 				panic("used page entry has invalid prev link (%p vs %p bin "
943 					"%lu)\n", page->prev, lastPage, i);
944 			}
945 
946 			if (!page->in_use)
947 				panic("used page marked as not in use\n");
948 
949 			if (page->bin_index != i) {
950 				panic("used page with bin index %u in page list of bin %lu\n",
951 					page->bin_index, i);
952 			}
953 
954 			if (page->free_count < lastFreeCount)
955 				panic("ordering of bin page list broken\n");
956 
957 			// validate the free list
958 			uint32 freeSlotsCount = 0;
959 			addr_t *element = page->free_list;
960 			addr_t pageBase = area->base + page->index * heap->page_size;
961 			while (element) {
962 				if ((addr_t)element < pageBase
963 					|| (addr_t)element >= pageBase + heap->page_size)
964 					panic("free list entry out of page range\n");
965 
966 				if (((addr_t)element - pageBase) % bin->element_size != 0)
967 					panic("free list entry not on a element boundary\n");
968 
969 				element = (addr_t *)*element;
970 				freeSlotsCount++;
971 			}
972 
973 			uint32 slotCount = bin->max_free_count;
974 			if (page->empty_index > slotCount) {
975 				panic("empty index beyond slot count (%u with %lu slots)\n",
976 					page->empty_index, slotCount);
977 			}
978 
979 			freeSlotsCount += (slotCount - page->empty_index);
980 			if (freeSlotsCount > slotCount)
981 				panic("more free slots than fit into the page\n");
982 
983 			lastPage = page;
984 			lastFreeCount = page->free_count;
985 			page = page->next;
986 		}
987 	}
988 
989 	pageLocker.Unlock();
990 	for (uint32 i = 0; i < heap->bin_count; i++)
991 		mutex_unlock(&heap->bins[i].lock);
992 	areaReadLocker.Unlock();
993 }
994 #endif // PARANOID_HEAP_VALIDATION
995 
996 
997 // #pragma mark - Heap functions
998 
999 
1000 void
1001 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1002 {
1003 	heap_area *area = (heap_area *)base;
1004 	area->area = areaID;
1005 
1006 	base += sizeof(heap_area);
1007 	size -= sizeof(heap_area);
1008 
1009 	uint32 pageCount = size / heap->page_size;
1010 	size_t pageTableSize = pageCount * sizeof(heap_page);
1011 	area->page_table = (heap_page *)base;
1012 	base += pageTableSize;
1013 	size -= pageTableSize;
1014 
1015 	// the rest is now actually usable memory (rounded to the next page)
1016 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1017 	area->size = size & ~(B_PAGE_SIZE - 1);
1018 
1019 	// now we know the real page count
1020 	pageCount = area->size / heap->page_size;
1021 	area->page_count = pageCount;
1022 
1023 	// zero out the page table and fill in page indexes
1024 	memset((void *)area->page_table, 0, pageTableSize);
1025 	for (uint32 i = 0; i < pageCount; i++) {
1026 		area->page_table[i].area = area;
1027 		area->page_table[i].index = i;
1028 	}
1029 
1030 	// add all pages up into the free pages list
1031 	for (uint32 i = 1; i < pageCount; i++) {
1032 		area->page_table[i - 1].next = &area->page_table[i];
1033 		area->page_table[i].prev = &area->page_table[i - 1];
1034 	}
1035 	area->free_pages = &area->page_table[0];
1036 	area->free_page_count = pageCount;
1037 	area->page_table[0].prev = NULL;
1038 	area->next = NULL;
1039 
1040 	WriteLocker areaWriteLocker(heap->area_lock);
1041 	MutexLocker pageLocker(heap->page_lock);
1042 	if (heap->areas == NULL) {
1043 		// it's the only (empty) area in that heap
1044 		area->prev = NULL;
1045 		heap->areas = area;
1046 	} else {
1047 		// link in this area as the last one as it is completely empty
1048 		heap_area *lastArea = heap->areas;
1049 		while (lastArea->next != NULL)
1050 			lastArea = lastArea->next;
1051 
1052 		lastArea->next = area;
1053 		area->prev = lastArea;
1054 	}
1055 
1056 	// insert this area in the all_areas list so it stays ordered by base
1057 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1058 		area->all_next = heap->all_areas;
1059 		heap->all_areas = area;
1060 	} else {
1061 		heap_area *insert = heap->all_areas;
1062 		while (insert->all_next && insert->all_next->base > area->base)
1063 			insert = insert->all_next;
1064 
1065 		area->all_next = insert->all_next;
1066 		insert->all_next = area;
1067 	}
1068 
1069 	heap->total_pages += area->page_count;
1070 	heap->total_free_pages += area->free_page_count;
1071 
1072 	if (areaID >= 0) {
1073 		// this later on deletable area is yet empty - the empty count will be
1074 		// decremented as soon as this area is used for the first time
1075 		heap->empty_areas++;
1076 	}
1077 
1078 	pageLocker.Unlock();
1079 	areaWriteLocker.Unlock();
1080 
1081 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1082 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1083 		area->base + area->size);
1084 }
1085 
1086 
1087 static status_t
1088 heap_remove_area(heap_allocator *heap, heap_area *area)
1089 {
1090 	if (area->free_page_count != area->page_count) {
1091 		panic("tried removing heap area that has still pages in use");
1092 		return B_ERROR;
1093 	}
1094 
1095 	if (area->prev == NULL && area->next == NULL) {
1096 		panic("tried removing the last non-full heap area");
1097 		return B_ERROR;
1098 	}
1099 
1100 	if (heap->areas == area)
1101 		heap->areas = area->next;
1102 	if (area->prev != NULL)
1103 		area->prev->next = area->next;
1104 	if (area->next != NULL)
1105 		area->next->prev = area->prev;
1106 
1107 	if (heap->all_areas == area)
1108 		heap->all_areas = area->all_next;
1109 	else {
1110 		heap_area *previous = heap->all_areas;
1111 		while (previous) {
1112 			if (previous->all_next == area) {
1113 				previous->all_next = area->all_next;
1114 				break;
1115 			}
1116 
1117 			previous = previous->all_next;
1118 		}
1119 
1120 		if (previous == NULL)
1121 			panic("removing heap area that is not in all list");
1122 	}
1123 
1124 	heap->total_pages -= area->page_count;
1125 	heap->total_free_pages -= area->free_page_count;
1126 
1127 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1128 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1129 		heap->name, heap);
1130 
1131 	return B_OK;
1132 }
1133 
1134 
1135 heap_allocator *
1136 heap_create_allocator(const char *name, addr_t base, size_t size,
1137 	const heap_class *heapClass, bool allocateOnHeap)
1138 {
1139 	heap_allocator *heap;
1140 	if (allocateOnHeap) {
1141 		// allocate seperately on the heap
1142 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1143 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1144 	} else {
1145 		// use up the first part of the area
1146 		heap = (heap_allocator *)base;
1147 		base += sizeof(heap_allocator);
1148 		size -= sizeof(heap_allocator);
1149 	}
1150 
1151 	heap->name = name;
1152 	heap->page_size = heapClass->page_size;
1153 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1154 	heap->areas = heap->all_areas = NULL;
1155 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1156 
1157 #if KERNEL_HEAP_LEAK_CHECK
1158 	heap->get_caller = &get_caller;
1159 #endif
1160 
1161 	heap->bin_count = 0;
1162 	size_t binSize = 0, lastSize = 0;
1163 	uint32 count = heap->page_size / heapClass->min_bin_size;
1164 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1165 		if (heap->bin_count >= MAX_BIN_COUNT)
1166 			panic("heap configuration invalid - max bin count reached\n");
1167 
1168 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1169 		if (binSize == lastSize)
1170 			continue;
1171 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1172 			continue;
1173 
1174 		heap_bin *bin = &heap->bins[heap->bin_count];
1175 		mutex_init(&bin->lock, "heap bin lock");
1176 		bin->element_size = binSize;
1177 		bin->max_free_count = heap->page_size / binSize;
1178 		bin->page_list = NULL;
1179 		heap->bin_count++;
1180 	};
1181 
1182 	if (!allocateOnHeap) {
1183 		base += heap->bin_count * sizeof(heap_bin);
1184 		size -= heap->bin_count * sizeof(heap_bin);
1185 	}
1186 
1187 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1188 	mutex_init(&heap->page_lock, "heap page lock");
1189 
1190 	heap_add_area(heap, -1, base, size);
1191 	return heap;
1192 }
1193 
1194 
1195 static inline void
1196 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1197 {
1198 	area->free_page_count += pageCount;
1199 	heap->total_free_pages += pageCount;
1200 
1201 	if (area->free_page_count == pageCount) {
1202 		// we need to add ourselfs to the area list of the heap
1203 		area->prev = NULL;
1204 		area->next = heap->areas;
1205 		if (area->next)
1206 			area->next->prev = area;
1207 		heap->areas = area;
1208 	} else {
1209 		// we might need to move back in the area list
1210 		if (area->next && area->next->free_page_count < area->free_page_count) {
1211 			// move ourselfs so the list stays ordered
1212 			heap_area *insert = area->next;
1213 			while (insert->next
1214 				&& insert->next->free_page_count < area->free_page_count)
1215 				insert = insert->next;
1216 
1217 			if (area->prev)
1218 				area->prev->next = area->next;
1219 			if (area->next)
1220 				area->next->prev = area->prev;
1221 			if (heap->areas == area)
1222 				heap->areas = area->next;
1223 
1224 			area->prev = insert;
1225 			area->next = insert->next;
1226 			if (area->next)
1227 				area->next->prev = area;
1228 			insert->next = area;
1229 		}
1230 	}
1231 
1232 	if (area->free_page_count == area->page_count && area->area >= 0)
1233 		heap->empty_areas++;
1234 }
1235 
1236 
1237 static inline void
1238 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1239 {
1240 	if (area->free_page_count == area->page_count && area->area >= 0) {
1241 		// this area was completely empty
1242 		heap->empty_areas--;
1243 	}
1244 
1245 	area->free_page_count -= pageCount;
1246 	heap->total_free_pages -= pageCount;
1247 
1248 	if (area->free_page_count == 0) {
1249 		// the area is now full so we remove it from the area list
1250 		if (area->prev)
1251 			area->prev->next = area->next;
1252 		if (area->next)
1253 			area->next->prev = area->prev;
1254 		if (heap->areas == area)
1255 			heap->areas = area->next;
1256 		area->next = area->prev = NULL;
1257 	} else {
1258 		// we might need to move forward in the area list
1259 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1260 			// move ourselfs so the list stays ordered
1261 			heap_area *insert = area->prev;
1262 			while (insert->prev
1263 				&& insert->prev->free_page_count > area->free_page_count)
1264 				insert = insert->prev;
1265 
1266 			if (area->prev)
1267 				area->prev->next = area->next;
1268 			if (area->next)
1269 				area->next->prev = area->prev;
1270 
1271 			area->prev = insert->prev;
1272 			area->next = insert;
1273 			if (area->prev)
1274 				area->prev->next = area;
1275 			if (heap->areas == insert)
1276 				heap->areas = area;
1277 			insert->prev = area;
1278 		}
1279 	}
1280 }
1281 
1282 
1283 static inline void
1284 heap_link_page(heap_page *page, heap_page **list)
1285 {
1286 	page->prev = NULL;
1287 	page->next = *list;
1288 	if (page->next)
1289 		page->next->prev = page;
1290 	*list = page;
1291 }
1292 
1293 
1294 static inline void
1295 heap_unlink_page(heap_page *page, heap_page **list)
1296 {
1297 	if (page->prev)
1298 		page->prev->next = page->next;
1299 	if (page->next)
1300 		page->next->prev = page->prev;
1301 	if (list && *list == page) {
1302 		*list = page->next;
1303 		if (page->next)
1304 			page->next->prev = NULL;
1305 	}
1306 }
1307 
1308 
1309 static heap_page *
1310 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1311 	size_t alignment)
1312 {
1313 	MutexLocker pageLocker(heap->page_lock);
1314 	heap_area *area = heap->areas;
1315 	while (area) {
1316 		if (area->free_page_count < pageCount) {
1317 			area = area->next;
1318 			continue;
1319 		}
1320 
1321 		uint32 step = 1;
1322 		uint32 firstValid = 0;
1323 		const uint32 lastValid = area->page_count - pageCount + 1;
1324 
1325 		if (alignment > heap->page_size) {
1326 			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1327 				/ heap->page_size;
1328 			step = alignment / heap->page_size;
1329 		}
1330 
1331 		int32 first = -1;
1332 		for (uint32 i = firstValid; i < lastValid; i += step) {
1333 			if (area->page_table[i].in_use)
1334 				continue;
1335 
1336 			first = i;
1337 
1338 			for (uint32 j = 1; j < pageCount; j++) {
1339 				if (area->page_table[i + j].in_use) {
1340 					first = -1;
1341 					i += j / step * step;
1342 					break;
1343 				}
1344 			}
1345 
1346 			if (first >= 0)
1347 				break;
1348 		}
1349 
1350 		if (first < 0) {
1351 			area = area->next;
1352 			continue;
1353 		}
1354 
1355 		for (uint32 i = first; i < first + pageCount; i++) {
1356 			heap_page *page = &area->page_table[i];
1357 			page->in_use = 1;
1358 			page->bin_index = heap->bin_count;
1359 
1360 			heap_unlink_page(page, &area->free_pages);
1361 
1362 			page->next = page->prev = NULL;
1363 			page->free_list = NULL;
1364 			page->allocation_id = (uint16)first;
1365 		}
1366 
1367 		heap_free_pages_removed(heap, area, pageCount);
1368 		return &area->page_table[first];
1369 	}
1370 
1371 	return NULL;
1372 }
1373 
1374 
1375 #if KERNEL_HEAP_LEAK_CHECK
1376 static void
1377 heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1378 	size_t size)
1379 {
1380 	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1381 		- sizeof(heap_leak_check_info));
1382 	info->size = size - sizeof(heap_leak_check_info);
1383 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1384 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1385 	info->caller = heap->get_caller();
1386 }
1387 #endif
1388 
1389 
1390 static void *
1391 heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1392 {
1393 	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1394 		heap, size, alignment));
1395 
1396 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1397 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1398 		alignment);
1399 	if (firstPage == NULL) {
1400 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1401 			heap, size));
1402 		return NULL;
1403 	}
1404 
1405 	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1406 #if KERNEL_HEAP_LEAK_CHECK
1407 	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1408 #endif
1409 	return (void *)address;
1410 }
1411 
1412 
1413 static void *
1414 heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1415 {
1416 	heap_bin *bin = &heap->bins[binIndex];
1417 	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1418 		heap, size, binIndex, bin->element_size));
1419 
1420 	MutexLocker binLocker(bin->lock);
1421 	heap_page *page = bin->page_list;
1422 	if (page == NULL) {
1423 		MutexLocker pageLocker(heap->page_lock);
1424 		heap_area *area = heap->areas;
1425 		if (area == NULL) {
1426 			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1427 				size));
1428 			return NULL;
1429 		}
1430 
1431 		// by design there are only areas in the list that still have
1432 		// free pages available
1433 		page = area->free_pages;
1434 		area->free_pages = page->next;
1435 		if (page->next)
1436 			page->next->prev = NULL;
1437 
1438 		heap_free_pages_removed(heap, area, 1);
1439 
1440 		if (page->in_use)
1441 			panic("got an in use page %p from the free pages list\n", page);
1442 		page->in_use = 1;
1443 
1444 		pageLocker.Unlock();
1445 
1446 		page->bin_index = binIndex;
1447 		page->free_count = bin->max_free_count;
1448 		page->empty_index = 0;
1449 		page->free_list = NULL;
1450 		page->next = page->prev = NULL;
1451 		bin->page_list = page;
1452 	}
1453 
1454 	// we have a page where we have a free slot
1455 	void *address = NULL;
1456 	if (page->free_list) {
1457 		// there's a previously freed entry we can use
1458 		address = page->free_list;
1459 		page->free_list = (addr_t *)*page->free_list;
1460 	} else {
1461 		// the page hasn't been fully allocated so use the next empty_index
1462 		address = (void *)(page->area->base + page->index * heap->page_size
1463 			+ page->empty_index * bin->element_size);
1464 		page->empty_index++;
1465 	}
1466 
1467 	page->free_count--;
1468 	if (page->free_count == 0) {
1469 		// the page is now full so we remove it from the page_list
1470 		bin->page_list = page->next;
1471 		if (page->next)
1472 			page->next->prev = NULL;
1473 		page->next = page->prev = NULL;
1474 	}
1475 
1476 #if KERNEL_HEAP_LEAK_CHECK
1477 	binLocker.Unlock();
1478 	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1479 #endif
1480 	return address;
1481 }
1482 
1483 
1484 static bool
1485 is_valid_alignment(size_t number)
1486 {
1487 	// this cryptic line accepts zero and all powers of two
1488 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1489 }
1490 
1491 
1492 inline bool
1493 heap_should_grow(heap_allocator *heap)
1494 {
1495 	// suggest growing if there is less than 20% of a grow size available
1496 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1497 }
1498 
1499 
1500 void *
1501 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1502 {
1503 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1504 
1505 #if DEBUG
1506 	if (!is_valid_alignment(alignment))
1507 		panic("memalign() with an alignment which is not a power of 2\n");
1508 #endif
1509 
1510 #if KERNEL_HEAP_LEAK_CHECK
1511 	size += sizeof(heap_leak_check_info);
1512 #endif
1513 
1514 	void *address = NULL;
1515 	if (alignment < B_PAGE_SIZE) {
1516 		if (alignment != 0) {
1517 			// TODO: The alignment is done by ensuring that the element size
1518 			// of the target bin is aligned with the requested alignment. This
1519 			// has the problem that it wastes space because a better (smaller)
1520 			// bin could possibly be selected. We should pick the best bin and
1521 			// check if there is an aligned block in the free list or if a new
1522 			// (page aligned) page has to be allocated anyway.
1523 			size = ROUNDUP(size, alignment);
1524 			for (uint32 i = 0; i < heap->bin_count; i++) {
1525 				if (size <= heap->bins[i].element_size
1526 					&& is_valid_alignment(heap->bins[i].element_size)) {
1527 					address = heap_allocate_from_bin(heap, i, size);
1528 					break;
1529 				}
1530 			}
1531 		} else {
1532 			for (uint32 i = 0; i < heap->bin_count; i++) {
1533 				if (size <= heap->bins[i].element_size) {
1534 					address = heap_allocate_from_bin(heap, i, size);
1535 					break;
1536 				}
1537 			}
1538 		}
1539 	}
1540 
1541 	if (address == NULL)
1542 		address = heap_raw_alloc(heap, size, alignment);
1543 
1544 #if KERNEL_HEAP_LEAK_CHECK
1545 	size -= sizeof(heap_leak_check_info);
1546 #endif
1547 
1548 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1549 		size, address));
1550 
1551 	T(Allocate((addr_t)address, size));
1552 	if (address == NULL)
1553 		return address;
1554 
1555 #if PARANOID_KERNEL_MALLOC
1556 	memset(address, 0xcc, size);
1557 #endif
1558 
1559 #if PARANOID_KERNEL_FREE
1560 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1561 	// and the user does not clear it
1562 	if (((uint32 *)address)[1] == 0xdeadbeef)
1563 		((uint32 *)address)[1] = 0xcccccccc;
1564 #endif
1565 
1566 	return address;
1567 }
1568 
1569 
1570 status_t
1571 heap_free(heap_allocator *heap, void *address)
1572 {
1573 	if (address == NULL)
1574 		return B_OK;
1575 
1576 	ReadLocker areaReadLocker(heap->area_lock);
1577 	heap_area *area = heap->all_areas;
1578 	while (area) {
1579 		// since the all_areas list is ordered by base with the biggest
1580 		// base at the top, we need only find the first area with a base
1581 		// smaller than our address to become our only candidate for freeing
1582 		if (area->base <= (addr_t)address) {
1583 			if ((addr_t)address >= area->base + area->size) {
1584 				// none of the other areas can contain the address as the list
1585 				// is ordered
1586 				return B_ENTRY_NOT_FOUND;
1587 			}
1588 
1589 			// this area contains the allocation, we're done searching
1590 			break;
1591 		}
1592 
1593 		area = area->all_next;
1594 	}
1595 
1596 	if (area == NULL) {
1597 		// this address does not belong to us
1598 		return B_ENTRY_NOT_FOUND;
1599 	}
1600 
1601 	TRACE(("free(): asked to free pointer %p\n", address));
1602 
1603 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1604 		/ heap->page_size];
1605 
1606 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1607 		page->bin_index, page->free_count));
1608 
1609 	if (page->bin_index > heap->bin_count) {
1610 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1611 		return B_ERROR;
1612 	}
1613 
1614 	if (page->bin_index < heap->bin_count) {
1615 		// small allocation
1616 		heap_bin *bin = &heap->bins[page->bin_index];
1617 
1618 #if PARANOID_KERNEL_FREE
1619 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1620 			// This block looks like it was freed already, walk the free list
1621 			// on this page to make sure this address doesn't exist.
1622 			MutexLocker binLocker(bin->lock);
1623 			for (addr_t *temp = page->free_list; temp != NULL;
1624 					temp = (addr_t *)*temp) {
1625 				if (temp == address) {
1626 					panic("free(): address %p already exists in page free "
1627 						"list\n", address);
1628 					return B_ERROR;
1629 				}
1630 			}
1631 		}
1632 
1633 		// the first 4 bytes are overwritten with the next free list pointer
1634 		// later
1635 		uint32 *dead = (uint32 *)address;
1636 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1637 			dead[i] = 0xdeadbeef;
1638 #endif
1639 
1640 		MutexLocker binLocker(bin->lock);
1641 		if (((addr_t)address - area->base - page->index
1642 			* heap->page_size) % bin->element_size != 0) {
1643 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1644 				"element size %ld\n", address, bin->element_size);
1645 			return B_ERROR;
1646 		}
1647 
1648 		// add the address to the page free list
1649 		*(addr_t *)address = (addr_t)page->free_list;
1650 		page->free_list = (addr_t *)address;
1651 		page->free_count++;
1652 
1653 		if (page->free_count == bin->max_free_count) {
1654 			// we are now empty, remove the page from the bin list
1655 			MutexLocker pageLocker(heap->page_lock);
1656 			heap_unlink_page(page, &bin->page_list);
1657 			page->in_use = 0;
1658 			heap_link_page(page, &area->free_pages);
1659 			heap_free_pages_added(heap, area, 1);
1660 		} else if (page->free_count == 1) {
1661 			// we need to add ourselfs to the page list of the bin
1662 			heap_link_page(page, &bin->page_list);
1663 		} else {
1664 			// we might need to move back in the free pages list
1665 			if (page->next && page->next->free_count < page->free_count) {
1666 				// move ourselfs so the list stays ordered
1667 				heap_page *insert = page->next;
1668 				while (insert->next
1669 					&& insert->next->free_count < page->free_count)
1670 					insert = insert->next;
1671 
1672 				heap_unlink_page(page, &bin->page_list);
1673 
1674 				page->prev = insert;
1675 				page->next = insert->next;
1676 				if (page->next)
1677 					page->next->prev = page;
1678 				insert->next = page;
1679 			}
1680 		}
1681 	} else {
1682 		// large allocation, just return the pages to the page free list
1683 		uint32 allocationID = page->allocation_id;
1684 		uint32 maxPages = area->page_count - page->index;
1685 		uint32 pageCount = 0;
1686 
1687 		MutexLocker pageLocker(heap->page_lock);
1688 		for (uint32 i = 0; i < maxPages; i++) {
1689 			// loop until we find the end of this allocation
1690 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1691 				|| page[i].allocation_id != allocationID)
1692 				break;
1693 
1694 			// this page still belongs to the same allocation
1695 			page[i].in_use = 0;
1696 			page[i].allocation_id = 0;
1697 
1698 			// return it to the free list
1699 			heap_link_page(&page[i], &area->free_pages);
1700 			pageCount++;
1701 		}
1702 
1703 		heap_free_pages_added(heap, area, pageCount);
1704 	}
1705 
1706 	T(Free((addr_t)address));
1707 	areaReadLocker.Unlock();
1708 
1709 	if (heap->empty_areas > 1) {
1710 		WriteLocker areaWriteLocker(heap->area_lock);
1711 		MutexLocker pageLocker(heap->page_lock);
1712 
1713 		area_id areasToDelete[heap->empty_areas - 1];
1714 		int32 areasToDeleteIndex = 0;
1715 
1716 		area = heap->areas;
1717 		while (area != NULL && heap->empty_areas > 1) {
1718 			heap_area *next = area->next;
1719 			if (area->area >= 0
1720 				&& area->free_page_count == area->page_count
1721 				&& heap_remove_area(heap, area) == B_OK) {
1722 				areasToDelete[areasToDeleteIndex++] = area->area;
1723 				heap->empty_areas--;
1724 			}
1725 
1726 			area = next;
1727 		}
1728 
1729 		pageLocker.Unlock();
1730 		areaWriteLocker.Unlock();
1731 
1732 		for (int32 i = 0; i < areasToDeleteIndex; i++)
1733 			delete_area(areasToDelete[i]);
1734 	}
1735 
1736 	return B_OK;
1737 }
1738 
1739 
1740 #if KERNEL_HEAP_LEAK_CHECK
1741 extern "C" void
1742 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1743 {
1744 	heap->get_caller = getCaller;
1745 }
1746 #endif
1747 
1748 
1749 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
1750 
1751 
1752 static status_t
1753 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1754 	size_t newSize)
1755 {
1756 	ReadLocker areaReadLocker(heap->area_lock);
1757 	heap_area *area = heap->all_areas;
1758 	while (area) {
1759 		// since the all_areas list is ordered by base with the biggest
1760 		// base at the top, we need only find the first area with a base
1761 		// smaller than our address to become our only candidate for
1762 		// reallocating
1763 		if (area->base <= (addr_t)address) {
1764 			if ((addr_t)address >= area->base + area->size) {
1765 				// none of the other areas can contain the address as the list
1766 				// is ordered
1767 				return B_ENTRY_NOT_FOUND;
1768 			}
1769 
1770 			// this area contains the allocation, we're done searching
1771 			break;
1772 		}
1773 
1774 		area = area->all_next;
1775 	}
1776 
1777 	if (area == NULL) {
1778 		// this address does not belong to us
1779 		return B_ENTRY_NOT_FOUND;
1780 	}
1781 
1782 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1783 
1784 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1785 		/ heap->page_size];
1786 	if (page->bin_index > heap->bin_count) {
1787 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1788 			page->bin_index);
1789 		return B_ERROR;
1790 	}
1791 
1792 	// find out the size of the old allocation first
1793 	size_t minSize = 0;
1794 	size_t maxSize = 0;
1795 	if (page->bin_index < heap->bin_count) {
1796 		// this was a small allocation
1797 		heap_bin *bin = &heap->bins[page->bin_index];
1798 		maxSize = bin->element_size;
1799 		if (page->bin_index > 0)
1800 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1801 	} else {
1802 		// this was a large allocation
1803 		uint32 allocationID = page->allocation_id;
1804 		uint32 maxPages = area->page_count - page->index;
1805 		maxSize = heap->page_size;
1806 
1807 		MutexLocker pageLocker(heap->page_lock);
1808 		for (uint32 i = 1; i < maxPages; i++) {
1809 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1810 				|| page[i].allocation_id != allocationID)
1811 				break;
1812 
1813 			minSize += heap->page_size;
1814 			maxSize += heap->page_size;
1815 		}
1816 	}
1817 
1818 	areaReadLocker.Unlock();
1819 
1820 #if KERNEL_HEAP_LEAK_CHECK
1821 	newSize += sizeof(heap_leak_check_info);
1822 #endif
1823 
1824 	// does the new allocation simply fit in the old allocation?
1825 	if (newSize > minSize && newSize <= maxSize) {
1826 #if KERNEL_HEAP_LEAK_CHECK
1827 		// update the size info (the info is at the end so stays where it is)
1828 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1829 			+ maxSize - sizeof(heap_leak_check_info));
1830 		info->size = newSize - sizeof(heap_leak_check_info);
1831 		newSize -= sizeof(heap_leak_check_info);
1832 #endif
1833 
1834 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1835 		*newAddress = address;
1836 		return B_OK;
1837 	}
1838 
1839 #if KERNEL_HEAP_LEAK_CHECK
1840 	// new leak check info will be created with the malloc below
1841 	newSize -= sizeof(heap_leak_check_info);
1842 #endif
1843 
1844 	// if not, allocate a new chunk of memory
1845 	*newAddress = memalign(0, newSize);
1846 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1847 	if (*newAddress == NULL) {
1848 		// we tried but it didn't work out, but still the operation is done
1849 		return B_OK;
1850 	}
1851 
1852 	// copy the old data and free the old allocation
1853 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1854 	heap_free(heap, address);
1855 	return B_OK;
1856 }
1857 
1858 
1859 inline uint32
1860 heap_index_for(size_t size, int32 cpu)
1861 {
1862 #if KERNEL_HEAP_LEAK_CHECK
1863 	// take the extra info size into account
1864 	size += sizeof(heap_leak_check_info_s);
1865 #endif
1866 
1867 	uint32 index = 0;
1868 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1869 		if (size <= sHeapClasses[index].max_allocation_size)
1870 			break;
1871 	}
1872 
1873 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1874 }
1875 
1876 
1877 static void *
1878 memalign_nogrow(size_t alignment, size_t size)
1879 {
1880 	// use dedicated memory in the grow thread by default
1881 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1882 		void *result = heap_memalign(sGrowHeap, alignment, size);
1883 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1884 			// hopefully the heap grower will manage to create a new heap
1885 			// before running out of private memory...
1886 			dprintf("heap: requesting new grow heap\n");
1887 			sAddGrowHeap = true;
1888 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1889 		}
1890 
1891 		if (result != NULL)
1892 			return result;
1893 	}
1894 
1895 	// try public memory, there might be something available
1896 	void *result = NULL;
1897 	int32 cpuCount = MIN(smp_get_num_cpus(),
1898 		(int32)sHeapCount / HEAP_CLASS_COUNT);
1899 	int32 cpuNumber = smp_get_current_cpu();
1900 	for (int32 i = 0; i < cpuCount; i++) {
1901 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1902 		heap_allocator *heap = sHeaps[heapIndex];
1903 		result = heap_memalign(heap, alignment, size);
1904 		if (result != NULL)
1905 			return result;
1906 	}
1907 
1908 	// no memory available
1909 	if (thread_get_current_thread_id() == sHeapGrowThread)
1910 		panic("heap: all heaps have run out of memory while growing\n");
1911 	else
1912 		dprintf("heap: all heaps have run out of memory\n");
1913 
1914 	return NULL;
1915 }
1916 
1917 
1918 static status_t
1919 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1920 {
1921 	void *address = NULL;
1922 	area_id heapArea = create_area(name, &address,
1923 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1924 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1925 	if (heapArea < B_OK) {
1926 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1927 		return heapArea;
1928 	}
1929 
1930 	heap_add_area(heap, heapArea, (addr_t)address, size);
1931 #if PARANOID_HEAP_VALIDATION
1932 	heap_validate_heap(heap);
1933 #endif
1934 	return B_OK;
1935 }
1936 
1937 
1938 static int32
1939 heap_grow_thread(void *)
1940 {
1941 	while (true) {
1942 		// wait for a request to grow the heap list
1943 		if (acquire_sem(sHeapGrowSem) < B_OK)
1944 			continue;
1945 
1946 		if (sAddGrowHeap) {
1947 			// the grow heap is going to run full soon, try to allocate a new
1948 			// one to make some room.
1949 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1950 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1951 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1952 				dprintf("heap_grower: failed to create new grow heap area\n");
1953 		}
1954 
1955 		for (uint32 i = 0; i < sHeapCount; i++) {
1956 			heap_allocator *heap = sHeaps[i];
1957 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1958 				|| heap_should_grow(heap)) {
1959 				// grow this heap if it is nearly full or if a grow was
1960 				// explicitly requested for this heap (happens when a large
1961 				// allocation cannot be fulfilled due to lack of contiguous
1962 				// pages)
1963 				if (heap_create_new_heap_area(heap, "additional heap",
1964 						HEAP_GROW_SIZE) != B_OK)
1965 					dprintf("heap_grower: failed to create new heap area\n");
1966 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1967 			}
1968 		}
1969 
1970 		// notify anyone waiting for this request
1971 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1972 	}
1973 
1974 	return 0;
1975 }
1976 
1977 
1978 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
1979 
1980 
1981 static void
1982 deferred_deleter(void *arg, int iteration)
1983 {
1984 	// move entries and deletables to on-stack lists
1985 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1986 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1987 		return;
1988 
1989 	DeferredFreeList entries;
1990 	entries.MoveFrom(&sDeferredFreeList);
1991 
1992 	DeferredDeletableList deletables;
1993 	deletables.MoveFrom(&sDeferredDeletableList);
1994 
1995 	locker.Unlock();
1996 
1997 	// free the entries
1998 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1999 		free(entry);
2000 
2001 	// delete the deletables
2002 	while (DeferredDeletable* deletable = deletables.RemoveHead())
2003 		delete deletable;
2004 }
2005 
2006 
2007 //	#pragma mark -
2008 
2009 
2010 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2011 
2012 
2013 status_t
2014 heap_init(addr_t base, size_t size)
2015 {
2016 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2017 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2018 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2019 			&sHeapClasses[i], false);
2020 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2021 		base += partSize;
2022 		sHeapCount++;
2023 	}
2024 
2025 	// set up some debug commands
2026 	add_debugger_command_etc("heap", &dump_heap_list,
2027 		"Dump infos about the kernel heap(s)",
2028 		"[(\"grow\" | \"stats\" | <heap>)]\n"
2029 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2030 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2031 		"given as the argument, currently only the heap count is printed.\n"
2032 		"If <heap> is given, it is interpreted as the address of the heap to\n"
2033 		"print infos about.\n", 0);
2034 #if !KERNEL_HEAP_LEAK_CHECK
2035 	add_debugger_command_etc("allocations", &dump_allocations,
2036 		"Dump current heap allocations",
2037 		"[\"stats\"] [<heap>]\n"
2038 		"If no parameters are given, all current alloactions are dumped.\n"
2039 		"If the optional argument \"stats\" is specified, only the allocation\n"
2040 		"counts and no individual allocations are printed\n"
2041 		"If a specific heap address is given, only allocations of this\n"
2042 		"allocator are dumped\n", 0);
2043 #else // !KERNEL_HEAP_LEAK_CHECK
2044 	add_debugger_command_etc("allocations", &dump_allocations,
2045 		"Dump current heap allocations",
2046 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2047 		"If no parameters are given, all current alloactions are dumped.\n"
2048 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2049 		"argument, only allocations matching the team ID, thread ID, caller\n"
2050 		"address or allocated address given in the second argument are printed.\n"
2051 		"If the optional argument \"stats\" is specified, only the allocation\n"
2052 		"counts and no individual allocations are printed\n", 0);
2053 	add_debugger_command_etc("allocations_per_caller",
2054 		&dump_allocations_per_caller,
2055 		"Dump current heap allocations summed up per caller",
2056 		"[ \"-c\" ] [ -h <heap> ]\n"
2057 		"The current allocations will by summed up by caller (their count and\n"
2058 		"size) printed in decreasing order by size or, if \"-c\" is\n"
2059 		"specified, by allocation count. If given <heap> specifies the\n"
2060 		"address of the heap for which to print the allocations.\n", 0);
2061 #endif // KERNEL_HEAP_LEAK_CHECK
2062 	return B_OK;
2063 }
2064 
2065 
2066 status_t
2067 heap_init_post_area()
2068 {
2069 	void *address = NULL;
2070 	area_id growHeapArea = create_area("dedicated grow heap", &address,
2071 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2072 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2073 	if (growHeapArea < 0) {
2074 		panic("heap_init_post_area(): couldn't allocate dedicate grow heap "
2075 			"area");
2076 		return growHeapArea;
2077 	}
2078 
2079 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2080 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2081 	if (sGrowHeap == NULL) {
2082 		panic("heap_init_post_area(): failed to create dedicated grow heap\n");
2083 		return B_ERROR;
2084 	}
2085 
2086 	// create the VIP heap
2087 	static const heap_class heapClass = {
2088 		"VIP I/O",					/* name */
2089 		100,						/* initial percentage */
2090 		B_PAGE_SIZE / 8,			/* max allocation size */
2091 		B_PAGE_SIZE,				/* page size */
2092 		8,							/* min bin size */
2093 		4,							/* bin alignment */
2094 		8,							/* min count per page */
2095 		16							/* max waste per page */
2096 	};
2097 
2098 	area_id vipHeapArea = create_area("VIP heap", &address,
2099 		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2100 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2101 	if (vipHeapArea < 0) {
2102 		panic("heap_init_post_area(): couldn't allocate VIP heap area");
2103 		return B_ERROR;
2104 	}
2105 
2106 	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2107 		VIP_HEAP_SIZE, &heapClass, false);
2108 	if (sVIPHeap == NULL) {
2109 		panic("heap_init_post_area(): failed to create VIP heap\n");
2110 		return B_ERROR;
2111 	}
2112 
2113 	dprintf("heap_init_post_area(): created VIP heap: %p\n", sVIPHeap);
2114 
2115 	return B_OK;
2116 }
2117 
2118 
2119 status_t
2120 heap_init_post_sem()
2121 {
2122 #if PARANOID_KERNEL_MALLOC
2123 	vm_block_address_range("uninitialized heap memory",
2124 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2125 #endif
2126 #if PARANOID_KERNEL_FREE
2127 	vm_block_address_range("freed heap memory",
2128 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2129 #endif
2130 
2131 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2132 	if (sHeapGrowSem < 0) {
2133 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2134 		return B_ERROR;
2135 	}
2136 
2137 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2138 	if (sHeapGrownNotify < 0) {
2139 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2140 		return B_ERROR;
2141 	}
2142 
2143 	return B_OK;
2144 }
2145 
2146 
2147 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2148 
2149 
2150 status_t
2151 heap_init_post_thread()
2152 {
2153 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
2154 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2155 		B_URGENT_PRIORITY, NULL);
2156 	if (sHeapGrowThread < 0) {
2157 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2158 		return sHeapGrowThread;
2159 	}
2160 
2161 	// create per-cpu heaps if there's enough memory
2162 	int32 heapCount = MIN(smp_get_num_cpus(),
2163 		(int32)vm_page_num_pages() / 60 / 1024);
2164 	for (int32 i = 1; i < heapCount; i++) {
2165 		addr_t base = 0;
2166 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2167 		area_id perCPUHeapArea = create_area("per cpu initial heap",
2168 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2169 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2170 		if (perCPUHeapArea < 0)
2171 			break;
2172 
2173 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2174 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2175 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2176 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2177 				base, partSize, &sHeapClasses[j], false);
2178 			sLastGrowRequest[heapIndex] = 0;
2179 			sLastHandledGrowRequest[heapIndex] = 0;
2180 			base += partSize;
2181 			sHeapCount++;
2182 		}
2183 	}
2184 
2185 	resume_thread(sHeapGrowThread);
2186 
2187 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2188 
2189 	// run the deferred deleter roughly once a second
2190 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2191 		panic("heap_init_post_thread(): failed to init deferred deleter");
2192 
2193 	return B_OK;
2194 }
2195 
2196 
2197 //	#pragma mark - Public API
2198 
2199 
2200 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2201 
2202 
2203 void *
2204 memalign(size_t alignment, size_t size)
2205 {
2206 	if (!gKernelStartup && !are_interrupts_enabled()) {
2207 		panic("memalign(): called with interrupts disabled\n");
2208 		return NULL;
2209 	}
2210 
2211 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2212 		// don't even attempt such a huge allocation - use areas instead
2213 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2214 			+ alignment, B_PAGE_SIZE);
2215 		if (areaSize < size) {
2216 			// the size overflowed
2217 			return NULL;
2218 		}
2219 
2220 		void *address = NULL;
2221 		area_id allocationArea = create_area("memalign area", &address,
2222 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2223 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2224 		if (allocationArea < B_OK) {
2225 			dprintf("heap: failed to create area for huge allocation\n");
2226 			return NULL;
2227 		}
2228 
2229 		area_allocation_info *info = (area_allocation_info *)address;
2230 		info->magic = kAreaAllocationMagic;
2231 		info->area = allocationArea;
2232 		info->base = address;
2233 		info->size = areaSize;
2234 		info->allocation_size = size;
2235 		info->allocation_alignment = alignment;
2236 
2237 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2238 		if (alignment != 0) {
2239 			address = (void *)ROUNDUP((addr_t)address, alignment);
2240 			ASSERT((addr_t)address % alignment == 0);
2241 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2242 		}
2243 
2244 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2245 			allocationArea, size));
2246 
2247 		info->allocation_base = address;
2248 
2249 #if PARANOID_KERNEL_MALLOC
2250 		memset(address, 0xcc, size);
2251 #endif
2252 		return address;
2253 	}
2254 
2255 	void *result = NULL;
2256 	bool shouldGrow = false;
2257 	int32 cpuCount = MIN(smp_get_num_cpus(),
2258 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2259 	int32 cpuNumber = smp_get_current_cpu();
2260 	for (int32 i = 0; i < cpuCount; i++) {
2261 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2262 		heap_allocator *heap = sHeaps[heapIndex];
2263 		result = heap_memalign(heap, alignment, size);
2264 		if (result != NULL) {
2265 			shouldGrow = heap_should_grow(heap);
2266 			break;
2267 		}
2268 
2269 #if PARANOID_HEAP_VALIDATION
2270 		heap_validate_heap(heap);
2271 #endif
2272 	}
2273 
2274 	if (result == NULL) {
2275 		// request an urgent grow and wait - we don't do it ourselfs here to
2276 		// serialize growing through the grow thread, as otherwise multiple
2277 		// threads hitting this situation (likely when memory ran out) would
2278 		// all add areas
2279 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2280 		sLastGrowRequest[heapIndex]++;
2281 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2282 
2283 		// and then try again
2284 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2285 	} else if (shouldGrow) {
2286 		// should grow sometime soon, notify the grower
2287 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2288 	}
2289 
2290 	if (result == NULL)
2291 		panic("heap: kernel heap has run out of memory\n");
2292 	return result;
2293 }
2294 
2295 
2296 void *
2297 memalign_etc(size_t alignment, size_t size, uint32 flags)
2298 {
2299 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2300 		return heap_memalign(sVIPHeap, alignment, size);
2301 
2302 	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2303 			!= 0) {
2304 		return memalign_nogrow(alignment, size);
2305 	}
2306 
2307 	return memalign(alignment, size);
2308 }
2309 
2310 
2311 void
2312 free_etc(void *address, uint32 flags)
2313 {
2314 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2315 		heap_free(sVIPHeap, address);
2316 	else
2317 		free(address);
2318 }
2319 
2320 
2321 void *
2322 malloc(size_t size)
2323 {
2324 	return memalign(0, size);
2325 }
2326 
2327 
2328 void
2329 free(void *address)
2330 {
2331 	if (!gKernelStartup && !are_interrupts_enabled()) {
2332 		panic("free(): called with interrupts disabled\n");
2333 		return;
2334 	}
2335 
2336 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2337 	for (uint32 i = 0; i < sHeapCount; i++) {
2338 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2339 		if (heap_free(heap, address) == B_OK) {
2340 #if PARANOID_HEAP_VALIDATION
2341 			heap_validate_heap(heap);
2342 #endif
2343 			return;
2344 		}
2345 	}
2346 
2347 	// maybe it was allocated from the dedicated grow heap
2348 	if (heap_free(sGrowHeap, address) == B_OK)
2349 		return;
2350 
2351 	// or maybe it was allocated from the VIP heap
2352 	if (heap_free(sVIPHeap, address) == B_OK)
2353 		return;
2354 
2355 	// or maybe it was a huge allocation using an area
2356 	area_info areaInfo;
2357 	area_id area = area_for(address);
2358 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2359 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2360 
2361 		// just make extra sure it was allocated by us
2362 		if (info->magic == kAreaAllocationMagic && info->area == area
2363 			&& info->size == areaInfo.size && info->base == areaInfo.address
2364 			&& info->allocation_size < areaInfo.size) {
2365 			delete_area(area);
2366 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2367 				area));
2368 			return;
2369 		}
2370 	}
2371 
2372 	panic("free(): free failed for address %p\n", address);
2373 }
2374 
2375 
2376 void *
2377 realloc(void *address, size_t newSize)
2378 {
2379 	if (!gKernelStartup && !are_interrupts_enabled()) {
2380 		panic("realloc(): called with interrupts disabled\n");
2381 		return NULL;
2382 	}
2383 
2384 	if (address == NULL)
2385 		return memalign(0, newSize);
2386 
2387 	if (newSize == 0) {
2388 		free(address);
2389 		return NULL;
2390 	}
2391 
2392 	void *newAddress = NULL;
2393 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2394 	for (uint32 i = 0; i < sHeapCount; i++) {
2395 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2396 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2397 #if PARANOID_HEAP_VALIDATION
2398 			heap_validate_heap(heap);
2399 #endif
2400 			return newAddress;
2401 		}
2402 	}
2403 
2404 	// maybe it was allocated from the dedicated grow heap
2405 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2406 		return newAddress;
2407 
2408 	// or maybe it was a huge allocation using an area
2409 	area_info areaInfo;
2410 	area_id area = area_for(address);
2411 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2412 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2413 
2414 		// just make extra sure it was allocated by us
2415 		if (info->magic == kAreaAllocationMagic && info->area == area
2416 			&& info->size == areaInfo.size && info->base == areaInfo.address
2417 			&& info->allocation_size < areaInfo.size) {
2418 			size_t available = info->size - ((addr_t)info->allocation_base
2419 				- (addr_t)info->base);
2420 
2421 			if (available >= newSize) {
2422 				// there is enough room available for the newSize
2423 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2424 					"available\n", newSize, area, available));
2425 				info->allocation_size = newSize;
2426 				return address;
2427 			}
2428 
2429 			// have to allocate/copy/free - TODO maybe resize the area instead?
2430 			newAddress = memalign(0, newSize);
2431 			if (newAddress == NULL) {
2432 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2433 					newSize);
2434 				return NULL;
2435 			}
2436 
2437 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2438 			delete_area(area);
2439 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2440 				"old area %ld\n", newAddress, newSize, area));
2441 			return newAddress;
2442 		}
2443 	}
2444 
2445 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2446 		newSize);
2447 	return NULL;
2448 }
2449 
2450 
2451 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2452 
2453 
2454 void *
2455 calloc(size_t numElements, size_t size)
2456 {
2457 	void *address = memalign(0, numElements * size);
2458 	if (address != NULL)
2459 		memset(address, 0, numElements * size);
2460 
2461 	return address;
2462 }
2463 
2464 
2465 void
2466 deferred_free(void *block)
2467 {
2468 	if (block == NULL)
2469 		return;
2470 
2471 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2472 
2473 	InterruptsSpinLocker _(sDeferredFreeListLock);
2474 	sDeferredFreeList.Add(entry);
2475 }
2476 
2477 
2478 void *
2479 malloc_referenced(size_t size)
2480 {
2481 	int32 *referencedData = (int32 *)malloc(size + 4);
2482 	if (referencedData == NULL)
2483 		return NULL;
2484 
2485 	*referencedData = 1;
2486 	return referencedData + 1;
2487 }
2488 
2489 
2490 void *
2491 malloc_referenced_acquire(void *data)
2492 {
2493 	if (data != NULL) {
2494 		int32 *referencedData = (int32 *)data - 1;
2495 		atomic_add(referencedData, 1);
2496 	}
2497 
2498 	return data;
2499 }
2500 
2501 
2502 void
2503 malloc_referenced_release(void *data)
2504 {
2505 	if (data == NULL)
2506 		return;
2507 
2508 	int32 *referencedData = (int32 *)data - 1;
2509 	if (atomic_add(referencedData, -1) < 1)
2510 		free(referencedData);
2511 }
2512 
2513 
2514 DeferredDeletable::~DeferredDeletable()
2515 {
2516 }
2517 
2518 
2519 void
2520 deferred_delete(DeferredDeletable *deletable)
2521 {
2522 	if (deletable == NULL)
2523 		return;
2524 
2525 	InterruptsSpinLocker _(sDeferredFreeListLock);
2526 	sDeferredDeletableList.Add(deletable);
2527 }
2528