xref: /haiku/src/system/kernel/heap.cpp (revision 19ae20e67e91fc09cc9fc5c0e60e21e24e7a53eb)
1 /*
2  * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <string.h>
19 #include <team.h>
20 #include <thread.h>
21 #include <tracing.h>
22 #include <util/AutoLock.h>
23 #include <vm/vm.h>
24 #include <vm/vm_page.h>
25 
26 
27 //#define TRACE_HEAP
28 #ifdef TRACE_HEAP
29 #	define TRACE(x) dprintf x
30 #else
31 #	define TRACE(x) ;
32 #endif
33 
34 
35 #if USE_SLAB_ALLOCATOR_FOR_MALLOC
36 #	undef KERNEL_HEAP_LEAK_CHECK
37 #endif
38 
39 
40 #if KERNEL_HEAP_LEAK_CHECK
41 typedef struct heap_leak_check_info_s {
42 	addr_t		caller;
43 	size_t		size;
44 	thread_id	thread;
45 	team_id		team;
46 } heap_leak_check_info;
47 
48 struct caller_info {
49 	addr_t		caller;
50 	uint32		count;
51 	uint32		size;
52 };
53 
54 static const int32 kCallerInfoTableSize = 1024;
55 static caller_info sCallerInfoTable[kCallerInfoTableSize];
56 static int32 sCallerInfoCount = 0;
57 #endif	// KERNEL_HEAP_LEAK_CHECK
58 
59 
60 typedef struct heap_page_s heap_page;
61 
62 
63 typedef struct heap_area_s {
64 	area_id			area;
65 
66 	addr_t			base;
67 	size_t			size;
68 
69 	uint32			page_count;
70 	uint32			free_page_count;
71 
72 	heap_page *		free_pages;
73 	heap_page *		page_table;
74 
75 	heap_area_s *	prev;
76 	heap_area_s *	next;
77 	heap_area_s *	all_next;
78 } heap_area;
79 
80 
81 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
82 
83 typedef struct heap_page_s {
84 	heap_area *		area;
85 	uint16			index;
86 	uint16			bin_index : 5;
87 	uint16			free_count : 10;
88 	uint16			in_use : 1;
89 	heap_page_s *	next;
90 	heap_page_s *	prev;
91 	union {
92 		uint16			empty_index;
93 		uint16			allocation_id; // used for bin == bin_count allocations
94 	};
95 	addr_t *		free_list;
96 } heap_page;
97 
98 
99 typedef struct heap_bin_s {
100 	mutex		lock;
101 	uint32		element_size;
102 	uint16		max_free_count;
103 	heap_page *	page_list; // sorted so that the desired page is always first
104 } heap_bin;
105 
106 
107 struct heap_allocator_s {
108 	rw_lock		area_lock;
109 	mutex		page_lock;
110 
111 	const char *name;
112 	uint32		bin_count;
113 	uint32		page_size;
114 
115 	uint32		total_pages;
116 	uint32		total_free_pages;
117 	uint32		empty_areas;
118 
119 #if KERNEL_HEAP_LEAK_CHECK
120 	addr_t		(*get_caller)();
121 #endif
122 
123 	heap_bin *	bins;
124 	heap_area *	areas; // sorted so that the desired area is always first
125 	heap_area *	all_areas; // all areas including full ones
126 };
127 
128 
129 static const uint32 kAreaAllocationMagic = 'AAMG';
130 typedef struct area_allocation_info_s {
131 	area_id		area;
132 	void *		base;
133 	uint32		magic;
134 	size_t		size;
135 	size_t		allocation_size;
136 	size_t		allocation_alignment;
137 	void *		allocation_base;
138 } area_allocation_info;
139 
140 
141 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
142 };
143 
144 
145 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
146 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
147 
148 
149 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
150 
151 #define VIP_HEAP_SIZE	1024 * 1024
152 
153 // Heap class configuration
154 #define HEAP_CLASS_COUNT 3
155 static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
156 	{
157 		"small",					/* name */
158 		50,							/* initial percentage */
159 		B_PAGE_SIZE / 8,			/* max allocation size */
160 		B_PAGE_SIZE,				/* page size */
161 		8,							/* min bin size */
162 		4,							/* bin alignment */
163 		8,							/* min count per page */
164 		16							/* max waste per page */
165 	},
166 	{
167 		"medium",					/* name */
168 		30,							/* initial percentage */
169 		B_PAGE_SIZE * 2,			/* max allocation size */
170 		B_PAGE_SIZE * 8,			/* page size */
171 		B_PAGE_SIZE / 8,			/* min bin size */
172 		32,							/* bin alignment */
173 		4,							/* min count per page */
174 		64							/* max waste per page */
175 	},
176 	{
177 		"large",					/* name */
178 		20,							/* initial percentage */
179 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
180 		B_PAGE_SIZE * 16,			/* page size */
181 		B_PAGE_SIZE * 2,			/* min bin size */
182 		128,						/* bin alignment */
183 		1,							/* min count per page */
184 		256							/* max waste per page */
185 	}
186 };
187 
188 
189 static uint32 sHeapCount;
190 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
191 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
192 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
193 
194 static heap_allocator *sVIPHeap;
195 static heap_allocator *sGrowHeap = NULL;
196 static thread_id sHeapGrowThread = -1;
197 static sem_id sHeapGrowSem = -1;
198 static sem_id sHeapGrownNotify = -1;
199 static bool sAddGrowHeap = false;
200 
201 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
202 
203 static DeferredFreeList sDeferredFreeList;
204 static DeferredDeletableList sDeferredDeletableList;
205 static spinlock sDeferredFreeListLock;
206 
207 
208 
209 // #pragma mark - Tracing
210 
211 #if KERNEL_HEAP_TRACING
212 namespace KernelHeapTracing {
213 
214 class Allocate : public AbstractTraceEntry {
215 	public:
216 		Allocate(addr_t address, size_t size)
217 			:	fAddress(address),
218 				fSize(size)
219 		{
220 			Initialized();
221 		}
222 
223 		virtual void AddDump(TraceOutput &out)
224 		{
225 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
226 		}
227 
228 	private:
229 		addr_t	fAddress;
230 		size_t	fSize;
231 };
232 
233 
234 class Reallocate : public AbstractTraceEntry {
235 	public:
236 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
237 			:	fOldAddress(oldAddress),
238 				fNewAddress(newAddress),
239 				fNewSize(newSize)
240 		{
241 			Initialized();
242 		};
243 
244 		virtual void AddDump(TraceOutput &out)
245 		{
246 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
247 				fOldAddress, fNewAddress, fNewSize);
248 		}
249 
250 	private:
251 		addr_t	fOldAddress;
252 		addr_t	fNewAddress;
253 		size_t	fNewSize;
254 };
255 
256 
257 class Free : public AbstractTraceEntry {
258 	public:
259 		Free(addr_t address)
260 			:	fAddress(address)
261 		{
262 			Initialized();
263 		};
264 
265 		virtual void AddDump(TraceOutput &out)
266 		{
267 			out.Print("heap free: 0x%08lx", fAddress);
268 		}
269 
270 	private:
271 		addr_t	fAddress;
272 };
273 
274 
275 } // namespace KernelHeapTracing
276 
277 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
278 #else
279 #	define T(x)	;
280 #endif
281 
282 
283 // #pragma mark - Debug functions
284 
285 
286 #if KERNEL_HEAP_LEAK_CHECK
287 static addr_t
288 get_caller()
289 {
290 	// Find the first return address outside of the allocator code. Note, that
291 	// this makes certain assumptions about how the code for the functions
292 	// ends up in the kernel object.
293 	addr_t returnAddresses[5];
294 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
295 		STACK_TRACE_KERNEL);
296 	for (int32 i = 0; i < depth; i++) {
297 		if (returnAddresses[i] < (addr_t)&get_caller
298 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
299 			return returnAddresses[i];
300 		}
301 	}
302 
303 	return 0;
304 }
305 #endif
306 
307 
308 static void
309 dump_page(heap_page *page)
310 {
311 	uint32 count = 0;
312 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
313 		count++;
314 
315 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
316 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
317 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
318 }
319 
320 
321 static void
322 dump_bin(heap_bin *bin)
323 {
324 	uint32 count = 0;
325 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
326 		count++;
327 
328 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
329 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
330 
331 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
332 		dump_page(page);
333 }
334 
335 
336 static void
337 dump_bin_list(heap_allocator *heap)
338 {
339 	for (uint32 i = 0; i < heap->bin_count; i++)
340 		dump_bin(&heap->bins[i]);
341 	kprintf("\n");
342 }
343 
344 
345 static void
346 dump_allocator_areas(heap_allocator *heap)
347 {
348 	heap_area *area = heap->all_areas;
349 	while (area) {
350 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
351 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
352 			area->size, area->page_count, area->free_pages,
353 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
354 		area = area->all_next;
355 	}
356 
357 	kprintf("\n");
358 }
359 
360 
361 static void
362 dump_allocator(heap_allocator *heap, bool areas, bool bins)
363 {
364 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
365 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
366 		heap->page_size, heap->bin_count, heap->total_pages,
367 		heap->total_free_pages, heap->empty_areas);
368 
369 	if (areas)
370 		dump_allocator_areas(heap);
371 	if (bins)
372 		dump_bin_list(heap);
373 }
374 
375 
376 static int
377 dump_heap_list(int argc, char **argv)
378 {
379 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
380 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
381 		// only dump dedicated grow heap info
382 		kprintf("dedicated grow heap:\n");
383 		dump_allocator(sGrowHeap, true, true);
384 		return 0;
385 	}
386 #endif
387 
388 	bool stats = false;
389 	int i = 1;
390 
391 	if (strcmp(argv[1], "stats") == 0) {
392 		stats = true;
393 		i++;
394 	}
395 
396 	uint64 heapAddress = 0;
397 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
398 		print_debugger_command_usage(argv[0]);
399 		return 0;
400 	}
401 
402 	if (heapAddress == 0) {
403 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
404 		// dump default kernel heaps
405 		for (uint32 i = 0; i < sHeapCount; i++)
406 			dump_allocator(sHeaps[i], !stats, !stats);
407 #else
408 		print_debugger_command_usage(argv[0]);
409 #endif
410 	} else {
411 		// dump specified heap
412 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
413 	}
414 
415 	return 0;
416 }
417 
418 
419 #if !KERNEL_HEAP_LEAK_CHECK
420 
421 static int
422 dump_allocations(int argc, char **argv)
423 {
424 	uint64 heapAddress = 0;
425 	bool statsOnly = false;
426 	for (int32 i = 1; i < argc; i++) {
427 		if (strcmp(argv[i], "stats") == 0)
428 			statsOnly = true;
429 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
430 			print_debugger_command_usage(argv[0]);
431 			return 0;
432 		}
433 	}
434 
435 	size_t totalSize = 0;
436 	uint32 totalCount = 0;
437 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
438 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
439 		heap_allocator *heap = sHeaps[heapIndex];
440 		if (heapAddress != 0)
441 			heap = (heap_allocator *)(addr_t)heapAddress;
442 #else
443 	while (true) {
444 		heap_allocator *heap = (heap_allocator *)(addr_t)heapAddress;
445 		if (heap == NULL) {
446 			print_debugger_command_usage(argv[0]);
447 			return 0;
448 		}
449 #endif
450 #if 0
451 	}
452 #endif
453 
454 		// go through all the pages in all the areas
455 		heap_area *area = heap->all_areas;
456 		while (area) {
457 			for (uint32 i = 0; i < area->page_count; i++) {
458 				heap_page *page = &area->page_table[i];
459 				if (!page->in_use)
460 					continue;
461 
462 				addr_t base = area->base + i * heap->page_size;
463 				if (page->bin_index < heap->bin_count) {
464 					// page is used by a small allocation bin
465 					uint32 elementCount = page->empty_index;
466 					size_t elementSize
467 						= heap->bins[page->bin_index].element_size;
468 					for (uint32 j = 0; j < elementCount;
469 							j++, base += elementSize) {
470 						// walk the free list to see if this element is in use
471 						bool elementInUse = true;
472 						for (addr_t *temp = page->free_list; temp != NULL;
473 								temp = (addr_t *)*temp) {
474 							if ((addr_t)temp == base) {
475 								elementInUse = false;
476 								break;
477 							}
478 						}
479 
480 						if (!elementInUse)
481 							continue;
482 
483 						if (!statsOnly) {
484 							kprintf("address: 0x%08lx; size: %lu bytes\n",
485 								base, elementSize);
486 						}
487 
488 						totalSize += elementSize;
489 						totalCount++;
490 					}
491 				} else {
492 					// page is used by a big allocation, find the page count
493 					uint32 pageCount = 1;
494 					while (i + pageCount < area->page_count
495 						&& area->page_table[i + pageCount].in_use
496 						&& area->page_table[i + pageCount].bin_index
497 							== heap->bin_count
498 						&& area->page_table[i + pageCount].allocation_id
499 							== page->allocation_id)
500 						pageCount++;
501 
502 					size_t size = pageCount * heap->page_size;
503 
504 					if (!statsOnly) {
505 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
506 							size);
507 					}
508 
509 					totalSize += size;
510 					totalCount++;
511 
512 					// skip the allocated pages
513 					i += pageCount - 1;
514 				}
515 			}
516 
517 			area = area->all_next;
518 		}
519 
520 		if (heapAddress != 0)
521 			break;
522 	}
523 
524 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
525 	return 0;
526 }
527 
528 #else // !KERNEL_HEAP_LEAK_CHECK
529 
530 static int
531 dump_allocations(int argc, char **argv)
532 {
533 	team_id team = -1;
534 	thread_id thread = -1;
535 	addr_t caller = 0;
536 	addr_t address = 0;
537 	bool statsOnly = false;
538 
539 	for (int32 i = 1; i < argc; i++) {
540 		if (strcmp(argv[i], "team") == 0)
541 			team = parse_expression(argv[++i]);
542 		else if (strcmp(argv[i], "thread") == 0)
543 			thread = parse_expression(argv[++i]);
544 		else if (strcmp(argv[i], "caller") == 0)
545 			caller = parse_expression(argv[++i]);
546 		else if (strcmp(argv[i], "address") == 0)
547 			address = parse_expression(argv[++i]);
548 		else if (strcmp(argv[i], "stats") == 0)
549 			statsOnly = true;
550 		else {
551 			print_debugger_command_usage(argv[0]);
552 			return 0;
553 		}
554 	}
555 
556 	size_t totalSize = 0;
557 	uint32 totalCount = 0;
558 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
559 		heap_allocator *heap = sHeaps[heapIndex];
560 
561 		// go through all the pages in all the areas
562 		heap_area *area = heap->all_areas;
563 		while (area) {
564 			heap_leak_check_info *info = NULL;
565 			for (uint32 i = 0; i < area->page_count; i++) {
566 				heap_page *page = &area->page_table[i];
567 				if (!page->in_use)
568 					continue;
569 
570 				addr_t base = area->base + i * heap->page_size;
571 				if (page->bin_index < heap->bin_count) {
572 					// page is used by a small allocation bin
573 					uint32 elementCount = page->empty_index;
574 					size_t elementSize
575 						= heap->bins[page->bin_index].element_size;
576 					for (uint32 j = 0; j < elementCount;
577 							j++, base += elementSize) {
578 						// walk the free list to see if this element is in use
579 						bool elementInUse = true;
580 						for (addr_t *temp = page->free_list; temp != NULL;
581 								temp = (addr_t *)*temp) {
582 							if ((addr_t)temp == base) {
583 								elementInUse = false;
584 								break;
585 							}
586 						}
587 
588 						if (!elementInUse)
589 							continue;
590 
591 						info = (heap_leak_check_info *)(base + elementSize
592 							- sizeof(heap_leak_check_info));
593 
594 						if ((team == -1 || info->team == team)
595 							&& (thread == -1 || info->thread == thread)
596 							&& (caller == 0 || info->caller == caller)
597 							&& (address == 0 || base == address)) {
598 							// interesting...
599 							if (!statsOnly) {
600 								kprintf("team: % 6ld; thread: % 6ld; "
601 									"address: 0x%08lx; size: %lu bytes; "
602 									"caller: %#lx\n", info->team, info->thread,
603 									base, info->size, info->caller);
604 							}
605 
606 							totalSize += info->size;
607 							totalCount++;
608 						}
609 					}
610 				} else {
611 					// page is used by a big allocation, find the page count
612 					uint32 pageCount = 1;
613 					while (i + pageCount < area->page_count
614 						&& area->page_table[i + pageCount].in_use
615 						&& area->page_table[i + pageCount].bin_index
616 							== heap->bin_count
617 						&& area->page_table[i + pageCount].allocation_id
618 							== page->allocation_id)
619 						pageCount++;
620 
621 					info = (heap_leak_check_info *)(base + pageCount
622 						* heap->page_size - sizeof(heap_leak_check_info));
623 
624 					if ((team == -1 || info->team == team)
625 						&& (thread == -1 || info->thread == thread)
626 						&& (caller == 0 || info->caller == caller)
627 						&& (address == 0 || base == address)) {
628 						// interesting...
629 						if (!statsOnly) {
630 							kprintf("team: % 6ld; thread: % 6ld;"
631 								" address: 0x%08lx; size: %lu bytes;"
632 								" caller: %#lx\n", info->team, info->thread,
633 								base, info->size, info->caller);
634 						}
635 
636 						totalSize += info->size;
637 						totalCount++;
638 					}
639 
640 					// skip the allocated pages
641 					i += pageCount - 1;
642 				}
643 			}
644 
645 			area = area->all_next;
646 		}
647 	}
648 
649 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
650 		totalSize);
651 	return 0;
652 }
653 
654 
655 static caller_info*
656 get_caller_info(addr_t caller)
657 {
658 	// find the caller info
659 	for (int32 i = 0; i < sCallerInfoCount; i++) {
660 		if (caller == sCallerInfoTable[i].caller)
661 			return &sCallerInfoTable[i];
662 	}
663 
664 	// not found, add a new entry, if there are free slots
665 	if (sCallerInfoCount >= kCallerInfoTableSize)
666 		return NULL;
667 
668 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
669 	info->caller = caller;
670 	info->count = 0;
671 	info->size = 0;
672 
673 	return info;
674 }
675 
676 
677 static int
678 caller_info_compare_size(const void* _a, const void* _b)
679 {
680 	const caller_info* a = (const caller_info*)_a;
681 	const caller_info* b = (const caller_info*)_b;
682 	return (int)(b->size - a->size);
683 }
684 
685 
686 static int
687 caller_info_compare_count(const void* _a, const void* _b)
688 {
689 	const caller_info* a = (const caller_info*)_a;
690 	const caller_info* b = (const caller_info*)_b;
691 	return (int)(b->count - a->count);
692 }
693 
694 
695 static bool
696 analyze_allocation_callers(heap_allocator *heap)
697 {
698 	// go through all the pages in all the areas
699 	heap_area *area = heap->all_areas;
700 	while (area) {
701 		heap_leak_check_info *info = NULL;
702 		for (uint32 i = 0; i < area->page_count; i++) {
703 			heap_page *page = &area->page_table[i];
704 			if (!page->in_use)
705 				continue;
706 
707 			addr_t base = area->base + i * heap->page_size;
708 			if (page->bin_index < heap->bin_count) {
709 				// page is used by a small allocation bin
710 				uint32 elementCount = page->empty_index;
711 				size_t elementSize = heap->bins[page->bin_index].element_size;
712 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
713 					// walk the free list to see if this element is in use
714 					bool elementInUse = true;
715 					for (addr_t *temp = page->free_list; temp != NULL;
716 						temp = (addr_t *)*temp) {
717 						if ((addr_t)temp == base) {
718 							elementInUse = false;
719 							break;
720 						}
721 					}
722 
723 					if (!elementInUse)
724 						continue;
725 
726 					info = (heap_leak_check_info *)(base + elementSize
727 						- sizeof(heap_leak_check_info));
728 
729 					caller_info *callerInfo = get_caller_info(info->caller);
730 					if (callerInfo == NULL) {
731 						kprintf("out of space for caller infos\n");
732 						return false;
733 					}
734 
735 					callerInfo->count++;
736 					callerInfo->size += info->size;
737 				}
738 			} else {
739 				// page is used by a big allocation, find the page count
740 				uint32 pageCount = 1;
741 				while (i + pageCount < area->page_count
742 					&& area->page_table[i + pageCount].in_use
743 					&& area->page_table[i + pageCount].bin_index
744 						== heap->bin_count
745 					&& area->page_table[i + pageCount].allocation_id
746 						== page->allocation_id) {
747 					pageCount++;
748 				}
749 
750 				info = (heap_leak_check_info *)(base + pageCount
751 					* heap->page_size - sizeof(heap_leak_check_info));
752 
753 				caller_info *callerInfo = get_caller_info(info->caller);
754 				if (callerInfo == NULL) {
755 					kprintf("out of space for caller infos\n");
756 					return false;
757 				}
758 
759 				callerInfo->count++;
760 				callerInfo->size += info->size;
761 
762 				// skip the allocated pages
763 				i += pageCount - 1;
764 			}
765 		}
766 
767 		area = area->all_next;
768 	}
769 
770 	return true;
771 }
772 
773 
774 static int
775 dump_allocations_per_caller(int argc, char **argv)
776 {
777 	bool sortBySize = true;
778 	heap_allocator *heap = NULL;
779 
780 	for (int32 i = 1; i < argc; i++) {
781 		if (strcmp(argv[i], "-c") == 0) {
782 			sortBySize = false;
783 		} else if (strcmp(argv[i], "-h") == 0) {
784 			uint64 heapAddress;
785 			if (++i >= argc
786 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
787 				print_debugger_command_usage(argv[0]);
788 				return 0;
789 			}
790 
791 			heap = (heap_allocator*)(addr_t)heapAddress;
792 		} else {
793 			print_debugger_command_usage(argv[0]);
794 			return 0;
795 		}
796 	}
797 
798 	sCallerInfoCount = 0;
799 
800 	if (heap != NULL) {
801 		if (!analyze_allocation_callers(heap))
802 			return 0;
803 	} else {
804 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
805 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
806 				return 0;
807 		}
808 	}
809 
810 	// sort the array
811 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
812 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
813 
814 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
815 		sortBySize ? "size" : "count");
816 
817 	kprintf("     count        size      caller\n");
818 	kprintf("----------------------------------\n");
819 	for (int32 i = 0; i < sCallerInfoCount; i++) {
820 		caller_info& info = sCallerInfoTable[i];
821 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
822 
823 		const char *symbol;
824 		const char *imageName;
825 		bool exactMatch;
826 		addr_t baseAddress;
827 
828 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
829 				&imageName, &exactMatch) == B_OK) {
830 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
831 				info.caller - baseAddress, imageName,
832 				exactMatch ? "" : " (nearest)");
833 		} else
834 			kprintf("\n");
835 	}
836 
837 	return 0;
838 }
839 
840 #endif // KERNEL_HEAP_LEAK_CHECK
841 
842 
843 #if PARANOID_HEAP_VALIDATION
844 static void
845 heap_validate_heap(heap_allocator *heap)
846 {
847 	ReadLocker areaReadLocker(heap->area_lock);
848 	for (uint32 i = 0; i < heap->bin_count; i++)
849 		mutex_lock(&heap->bins[i].lock);
850 	MutexLocker pageLocker(heap->page_lock);
851 
852 	uint32 totalPageCount = 0;
853 	uint32 totalFreePageCount = 0;
854 	heap_area *area = heap->all_areas;
855 	while (area != NULL) {
856 		// validate the free pages list
857 		uint32 freePageCount = 0;
858 		heap_page *lastPage = NULL;
859 		heap_page *page = area->free_pages;
860 		while (page) {
861 			if ((addr_t)page < (addr_t)&area->page_table[0]
862 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
863 				panic("free page is not part of the page table\n");
864 
865 			if (page->index >= area->page_count)
866 				panic("free page has invalid index\n");
867 
868 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
869 				panic("free page index does not lead to target page\n");
870 
871 			if (page->prev != lastPage)
872 				panic("free page entry has invalid prev link\n");
873 
874 			if (page->in_use)
875 				panic("free page marked as in use\n");
876 
877 			lastPage = page;
878 			page = page->next;
879 			freePageCount++;
880 		}
881 
882 		totalPageCount += freePageCount;
883 		totalFreePageCount += freePageCount;
884 		if (area->free_page_count != freePageCount)
885 			panic("free page count doesn't match free page list\n");
886 
887 		// validate the page table
888 		uint32 usedPageCount = 0;
889 		for (uint32 i = 0; i < area->page_count; i++) {
890 			if (area->page_table[i].in_use)
891 				usedPageCount++;
892 		}
893 
894 		totalPageCount += usedPageCount;
895 		if (freePageCount + usedPageCount != area->page_count) {
896 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
897 				freePageCount, usedPageCount, area->page_count);
898 		}
899 
900 		area = area->all_next;
901 	}
902 
903 	// validate the areas
904 	area = heap->areas;
905 	heap_area *lastArea = NULL;
906 	uint32 lastFreeCount = 0;
907 	while (area != NULL) {
908 		if (area->free_page_count < lastFreeCount)
909 			panic("size ordering of area list broken\n");
910 
911 		if (area->prev != lastArea)
912 			panic("area list entry has invalid prev link\n");
913 
914 		lastArea = area;
915 		lastFreeCount = area->free_page_count;
916 		area = area->next;
917 	}
918 
919 	lastArea = NULL;
920 	area = heap->all_areas;
921 	while (area != NULL) {
922 		if (lastArea != NULL && lastArea->base < area->base)
923 			panic("base ordering of all_areas list broken\n");
924 
925 		lastArea = area;
926 		area = area->all_next;
927 	}
928 
929 	// validate the bins
930 	for (uint32 i = 0; i < heap->bin_count; i++) {
931 		heap_bin *bin = &heap->bins[i];
932 		heap_page *lastPage = NULL;
933 		heap_page *page = bin->page_list;
934 		lastFreeCount = 0;
935 		while (page) {
936 			area = heap->all_areas;
937 			while (area) {
938 				if (area == page->area)
939 					break;
940 				area = area->all_next;
941 			}
942 
943 			if (area == NULL) {
944 				panic("page area not present in area list\n");
945 				page = page->next;
946 				continue;
947 			}
948 
949 			if ((addr_t)page < (addr_t)&area->page_table[0]
950 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
951 				panic("used page is not part of the page table\n");
952 
953 			if (page->index >= area->page_count)
954 				panic("used page has invalid index\n");
955 
956 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
957 				panic("used page index does not lead to target page\n");
958 
959 			if (page->prev != lastPage) {
960 				panic("used page entry has invalid prev link (%p vs %p bin "
961 					"%lu)\n", page->prev, lastPage, i);
962 			}
963 
964 			if (!page->in_use)
965 				panic("used page marked as not in use\n");
966 
967 			if (page->bin_index != i) {
968 				panic("used page with bin index %u in page list of bin %lu\n",
969 					page->bin_index, i);
970 			}
971 
972 			if (page->free_count < lastFreeCount)
973 				panic("ordering of bin page list broken\n");
974 
975 			// validate the free list
976 			uint32 freeSlotsCount = 0;
977 			addr_t *element = page->free_list;
978 			addr_t pageBase = area->base + page->index * heap->page_size;
979 			while (element) {
980 				if ((addr_t)element < pageBase
981 					|| (addr_t)element >= pageBase + heap->page_size)
982 					panic("free list entry out of page range\n");
983 
984 				if (((addr_t)element - pageBase) % bin->element_size != 0)
985 					panic("free list entry not on a element boundary\n");
986 
987 				element = (addr_t *)*element;
988 				freeSlotsCount++;
989 			}
990 
991 			uint32 slotCount = bin->max_free_count;
992 			if (page->empty_index > slotCount) {
993 				panic("empty index beyond slot count (%u with %lu slots)\n",
994 					page->empty_index, slotCount);
995 			}
996 
997 			freeSlotsCount += (slotCount - page->empty_index);
998 			if (freeSlotsCount > slotCount)
999 				panic("more free slots than fit into the page\n");
1000 
1001 			lastPage = page;
1002 			lastFreeCount = page->free_count;
1003 			page = page->next;
1004 		}
1005 	}
1006 
1007 	pageLocker.Unlock();
1008 	for (uint32 i = 0; i < heap->bin_count; i++)
1009 		mutex_unlock(&heap->bins[i].lock);
1010 	areaReadLocker.Unlock();
1011 }
1012 #endif // PARANOID_HEAP_VALIDATION
1013 
1014 
1015 // #pragma mark - Heap functions
1016 
1017 
1018 void
1019 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1020 {
1021 	heap_area *area = (heap_area *)base;
1022 	area->area = areaID;
1023 
1024 	base += sizeof(heap_area);
1025 	size -= sizeof(heap_area);
1026 
1027 	uint32 pageCount = size / heap->page_size;
1028 	size_t pageTableSize = pageCount * sizeof(heap_page);
1029 	area->page_table = (heap_page *)base;
1030 	base += pageTableSize;
1031 	size -= pageTableSize;
1032 
1033 	// the rest is now actually usable memory (rounded to the next page)
1034 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1035 	area->size = size & ~(B_PAGE_SIZE - 1);
1036 
1037 	// now we know the real page count
1038 	pageCount = area->size / heap->page_size;
1039 	area->page_count = pageCount;
1040 
1041 	// zero out the page table and fill in page indexes
1042 	memset((void *)area->page_table, 0, pageTableSize);
1043 	for (uint32 i = 0; i < pageCount; i++) {
1044 		area->page_table[i].area = area;
1045 		area->page_table[i].index = i;
1046 	}
1047 
1048 	// add all pages up into the free pages list
1049 	for (uint32 i = 1; i < pageCount; i++) {
1050 		area->page_table[i - 1].next = &area->page_table[i];
1051 		area->page_table[i].prev = &area->page_table[i - 1];
1052 	}
1053 	area->free_pages = &area->page_table[0];
1054 	area->free_page_count = pageCount;
1055 	area->page_table[0].prev = NULL;
1056 	area->next = NULL;
1057 
1058 	WriteLocker areaWriteLocker(heap->area_lock);
1059 	MutexLocker pageLocker(heap->page_lock);
1060 	if (heap->areas == NULL) {
1061 		// it's the only (empty) area in that heap
1062 		area->prev = NULL;
1063 		heap->areas = area;
1064 	} else {
1065 		// link in this area as the last one as it is completely empty
1066 		heap_area *lastArea = heap->areas;
1067 		while (lastArea->next != NULL)
1068 			lastArea = lastArea->next;
1069 
1070 		lastArea->next = area;
1071 		area->prev = lastArea;
1072 	}
1073 
1074 	// insert this area in the all_areas list so it stays ordered by base
1075 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1076 		area->all_next = heap->all_areas;
1077 		heap->all_areas = area;
1078 	} else {
1079 		heap_area *insert = heap->all_areas;
1080 		while (insert->all_next && insert->all_next->base > area->base)
1081 			insert = insert->all_next;
1082 
1083 		area->all_next = insert->all_next;
1084 		insert->all_next = area;
1085 	}
1086 
1087 	heap->total_pages += area->page_count;
1088 	heap->total_free_pages += area->free_page_count;
1089 
1090 	if (areaID >= 0) {
1091 		// this later on deletable area is yet empty - the empty count will be
1092 		// decremented as soon as this area is used for the first time
1093 		heap->empty_areas++;
1094 	}
1095 
1096 	pageLocker.Unlock();
1097 	areaWriteLocker.Unlock();
1098 
1099 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1100 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1101 		area->base + area->size);
1102 }
1103 
1104 
1105 static status_t
1106 heap_remove_area(heap_allocator *heap, heap_area *area)
1107 {
1108 	if (area->free_page_count != area->page_count) {
1109 		panic("tried removing heap area that has still pages in use");
1110 		return B_ERROR;
1111 	}
1112 
1113 	if (area->prev == NULL && area->next == NULL) {
1114 		panic("tried removing the last non-full heap area");
1115 		return B_ERROR;
1116 	}
1117 
1118 	if (heap->areas == area)
1119 		heap->areas = area->next;
1120 	if (area->prev != NULL)
1121 		area->prev->next = area->next;
1122 	if (area->next != NULL)
1123 		area->next->prev = area->prev;
1124 
1125 	if (heap->all_areas == area)
1126 		heap->all_areas = area->all_next;
1127 	else {
1128 		heap_area *previous = heap->all_areas;
1129 		while (previous) {
1130 			if (previous->all_next == area) {
1131 				previous->all_next = area->all_next;
1132 				break;
1133 			}
1134 
1135 			previous = previous->all_next;
1136 		}
1137 
1138 		if (previous == NULL)
1139 			panic("removing heap area that is not in all list");
1140 	}
1141 
1142 	heap->total_pages -= area->page_count;
1143 	heap->total_free_pages -= area->free_page_count;
1144 
1145 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1146 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1147 		heap->name, heap);
1148 
1149 	return B_OK;
1150 }
1151 
1152 
1153 heap_allocator *
1154 heap_create_allocator(const char *name, addr_t base, size_t size,
1155 	const heap_class *heapClass, bool allocateOnHeap)
1156 {
1157 	heap_allocator *heap;
1158 	if (allocateOnHeap) {
1159 		// allocate seperately on the heap
1160 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1161 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1162 	} else {
1163 		// use up the first part of the area
1164 		heap = (heap_allocator *)base;
1165 		base += sizeof(heap_allocator);
1166 		size -= sizeof(heap_allocator);
1167 	}
1168 
1169 	heap->name = name;
1170 	heap->page_size = heapClass->page_size;
1171 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1172 	heap->areas = heap->all_areas = NULL;
1173 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1174 
1175 #if KERNEL_HEAP_LEAK_CHECK
1176 	heap->get_caller = &get_caller;
1177 #endif
1178 
1179 	heap->bin_count = 0;
1180 	size_t binSize = 0, lastSize = 0;
1181 	uint32 count = heap->page_size / heapClass->min_bin_size;
1182 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1183 		if (heap->bin_count >= MAX_BIN_COUNT)
1184 			panic("heap configuration invalid - max bin count reached\n");
1185 
1186 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1187 		if (binSize == lastSize)
1188 			continue;
1189 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1190 			continue;
1191 
1192 		heap_bin *bin = &heap->bins[heap->bin_count];
1193 		mutex_init(&bin->lock, "heap bin lock");
1194 		bin->element_size = binSize;
1195 		bin->max_free_count = heap->page_size / binSize;
1196 		bin->page_list = NULL;
1197 		heap->bin_count++;
1198 	};
1199 
1200 	if (!allocateOnHeap) {
1201 		base += heap->bin_count * sizeof(heap_bin);
1202 		size -= heap->bin_count * sizeof(heap_bin);
1203 	}
1204 
1205 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1206 	mutex_init(&heap->page_lock, "heap page lock");
1207 
1208 	heap_add_area(heap, -1, base, size);
1209 	return heap;
1210 }
1211 
1212 
1213 static inline void
1214 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1215 {
1216 	area->free_page_count += pageCount;
1217 	heap->total_free_pages += pageCount;
1218 
1219 	if (area->free_page_count == pageCount) {
1220 		// we need to add ourselfs to the area list of the heap
1221 		area->prev = NULL;
1222 		area->next = heap->areas;
1223 		if (area->next)
1224 			area->next->prev = area;
1225 		heap->areas = area;
1226 	} else {
1227 		// we might need to move back in the area list
1228 		if (area->next && area->next->free_page_count < area->free_page_count) {
1229 			// move ourselfs so the list stays ordered
1230 			heap_area *insert = area->next;
1231 			while (insert->next
1232 				&& insert->next->free_page_count < area->free_page_count)
1233 				insert = insert->next;
1234 
1235 			if (area->prev)
1236 				area->prev->next = area->next;
1237 			if (area->next)
1238 				area->next->prev = area->prev;
1239 			if (heap->areas == area)
1240 				heap->areas = area->next;
1241 
1242 			area->prev = insert;
1243 			area->next = insert->next;
1244 			if (area->next)
1245 				area->next->prev = area;
1246 			insert->next = area;
1247 		}
1248 	}
1249 
1250 	if (area->free_page_count == area->page_count && area->area >= 0)
1251 		heap->empty_areas++;
1252 }
1253 
1254 
1255 static inline void
1256 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1257 {
1258 	if (area->free_page_count == area->page_count && area->area >= 0) {
1259 		// this area was completely empty
1260 		heap->empty_areas--;
1261 	}
1262 
1263 	area->free_page_count -= pageCount;
1264 	heap->total_free_pages -= pageCount;
1265 
1266 	if (area->free_page_count == 0) {
1267 		// the area is now full so we remove it from the area list
1268 		if (area->prev)
1269 			area->prev->next = area->next;
1270 		if (area->next)
1271 			area->next->prev = area->prev;
1272 		if (heap->areas == area)
1273 			heap->areas = area->next;
1274 		area->next = area->prev = NULL;
1275 	} else {
1276 		// we might need to move forward in the area list
1277 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1278 			// move ourselfs so the list stays ordered
1279 			heap_area *insert = area->prev;
1280 			while (insert->prev
1281 				&& insert->prev->free_page_count > area->free_page_count)
1282 				insert = insert->prev;
1283 
1284 			if (area->prev)
1285 				area->prev->next = area->next;
1286 			if (area->next)
1287 				area->next->prev = area->prev;
1288 
1289 			area->prev = insert->prev;
1290 			area->next = insert;
1291 			if (area->prev)
1292 				area->prev->next = area;
1293 			if (heap->areas == insert)
1294 				heap->areas = area;
1295 			insert->prev = area;
1296 		}
1297 	}
1298 }
1299 
1300 
1301 static inline void
1302 heap_link_page(heap_page *page, heap_page **list)
1303 {
1304 	page->prev = NULL;
1305 	page->next = *list;
1306 	if (page->next)
1307 		page->next->prev = page;
1308 	*list = page;
1309 }
1310 
1311 
1312 static inline void
1313 heap_unlink_page(heap_page *page, heap_page **list)
1314 {
1315 	if (page->prev)
1316 		page->prev->next = page->next;
1317 	if (page->next)
1318 		page->next->prev = page->prev;
1319 	if (list && *list == page) {
1320 		*list = page->next;
1321 		if (page->next)
1322 			page->next->prev = NULL;
1323 	}
1324 }
1325 
1326 
1327 static heap_page *
1328 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1329 	size_t alignment)
1330 {
1331 	MutexLocker pageLocker(heap->page_lock);
1332 	heap_area *area = heap->areas;
1333 	while (area) {
1334 		if (area->free_page_count < pageCount) {
1335 			area = area->next;
1336 			continue;
1337 		}
1338 
1339 		uint32 step = 1;
1340 		uint32 firstValid = 0;
1341 		const uint32 lastValid = area->page_count - pageCount + 1;
1342 
1343 		if (alignment > heap->page_size) {
1344 			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1345 				/ heap->page_size;
1346 			step = alignment / heap->page_size;
1347 		}
1348 
1349 		int32 first = -1;
1350 		for (uint32 i = firstValid; i < lastValid; i += step) {
1351 			if (area->page_table[i].in_use)
1352 				continue;
1353 
1354 			first = i;
1355 
1356 			for (uint32 j = 1; j < pageCount; j++) {
1357 				if (area->page_table[i + j].in_use) {
1358 					first = -1;
1359 					i += j / step * step;
1360 					break;
1361 				}
1362 			}
1363 
1364 			if (first >= 0)
1365 				break;
1366 		}
1367 
1368 		if (first < 0) {
1369 			area = area->next;
1370 			continue;
1371 		}
1372 
1373 		for (uint32 i = first; i < first + pageCount; i++) {
1374 			heap_page *page = &area->page_table[i];
1375 			page->in_use = 1;
1376 			page->bin_index = heap->bin_count;
1377 
1378 			heap_unlink_page(page, &area->free_pages);
1379 
1380 			page->next = page->prev = NULL;
1381 			page->free_list = NULL;
1382 			page->allocation_id = (uint16)first;
1383 		}
1384 
1385 		heap_free_pages_removed(heap, area, pageCount);
1386 		return &area->page_table[first];
1387 	}
1388 
1389 	return NULL;
1390 }
1391 
1392 
1393 #if KERNEL_HEAP_LEAK_CHECK
1394 static void
1395 heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1396 	size_t size)
1397 {
1398 	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1399 		- sizeof(heap_leak_check_info));
1400 	info->size = size - sizeof(heap_leak_check_info);
1401 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1402 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1403 	info->caller = heap->get_caller();
1404 }
1405 #endif
1406 
1407 
1408 static void *
1409 heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1410 {
1411 	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1412 		heap, size, alignment));
1413 
1414 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1415 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1416 		alignment);
1417 	if (firstPage == NULL) {
1418 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1419 			heap, size));
1420 		return NULL;
1421 	}
1422 
1423 	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1424 #if KERNEL_HEAP_LEAK_CHECK
1425 	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1426 #endif
1427 	return (void *)address;
1428 }
1429 
1430 
1431 static void *
1432 heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1433 {
1434 	heap_bin *bin = &heap->bins[binIndex];
1435 	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1436 		heap, size, binIndex, bin->element_size));
1437 
1438 	MutexLocker binLocker(bin->lock);
1439 	heap_page *page = bin->page_list;
1440 	if (page == NULL) {
1441 		MutexLocker pageLocker(heap->page_lock);
1442 		heap_area *area = heap->areas;
1443 		if (area == NULL) {
1444 			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1445 				size));
1446 			return NULL;
1447 		}
1448 
1449 		// by design there are only areas in the list that still have
1450 		// free pages available
1451 		page = area->free_pages;
1452 		area->free_pages = page->next;
1453 		if (page->next)
1454 			page->next->prev = NULL;
1455 
1456 		heap_free_pages_removed(heap, area, 1);
1457 
1458 		if (page->in_use)
1459 			panic("got an in use page %p from the free pages list\n", page);
1460 		page->in_use = 1;
1461 
1462 		pageLocker.Unlock();
1463 
1464 		page->bin_index = binIndex;
1465 		page->free_count = bin->max_free_count;
1466 		page->empty_index = 0;
1467 		page->free_list = NULL;
1468 		page->next = page->prev = NULL;
1469 		bin->page_list = page;
1470 	}
1471 
1472 	// we have a page where we have a free slot
1473 	void *address = NULL;
1474 	if (page->free_list) {
1475 		// there's a previously freed entry we can use
1476 		address = page->free_list;
1477 		page->free_list = (addr_t *)*page->free_list;
1478 	} else {
1479 		// the page hasn't been fully allocated so use the next empty_index
1480 		address = (void *)(page->area->base + page->index * heap->page_size
1481 			+ page->empty_index * bin->element_size);
1482 		page->empty_index++;
1483 	}
1484 
1485 	page->free_count--;
1486 	if (page->free_count == 0) {
1487 		// the page is now full so we remove it from the page_list
1488 		bin->page_list = page->next;
1489 		if (page->next)
1490 			page->next->prev = NULL;
1491 		page->next = page->prev = NULL;
1492 	}
1493 
1494 #if KERNEL_HEAP_LEAK_CHECK
1495 	binLocker.Unlock();
1496 	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1497 #endif
1498 	return address;
1499 }
1500 
1501 
1502 static bool
1503 is_valid_alignment(size_t number)
1504 {
1505 	// this cryptic line accepts zero and all powers of two
1506 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1507 }
1508 
1509 
1510 inline bool
1511 heap_should_grow(heap_allocator *heap)
1512 {
1513 	// suggest growing if there is less than 20% of a grow size available
1514 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1515 }
1516 
1517 
1518 void *
1519 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1520 {
1521 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1522 
1523 #if DEBUG
1524 	if (!is_valid_alignment(alignment))
1525 		panic("memalign() with an alignment which is not a power of 2\n");
1526 #endif
1527 
1528 #if KERNEL_HEAP_LEAK_CHECK
1529 	size += sizeof(heap_leak_check_info);
1530 #endif
1531 
1532 	void *address = NULL;
1533 	if (alignment < B_PAGE_SIZE) {
1534 		if (alignment != 0) {
1535 			// TODO: The alignment is done by ensuring that the element size
1536 			// of the target bin is aligned with the requested alignment. This
1537 			// has the problem that it wastes space because a better (smaller)
1538 			// bin could possibly be selected. We should pick the best bin and
1539 			// check if there is an aligned block in the free list or if a new
1540 			// (page aligned) page has to be allocated anyway.
1541 			size = ROUNDUP(size, alignment);
1542 			for (uint32 i = 0; i < heap->bin_count; i++) {
1543 				if (size <= heap->bins[i].element_size
1544 					&& is_valid_alignment(heap->bins[i].element_size)) {
1545 					address = heap_allocate_from_bin(heap, i, size);
1546 					break;
1547 				}
1548 			}
1549 		} else {
1550 			for (uint32 i = 0; i < heap->bin_count; i++) {
1551 				if (size <= heap->bins[i].element_size) {
1552 					address = heap_allocate_from_bin(heap, i, size);
1553 					break;
1554 				}
1555 			}
1556 		}
1557 	}
1558 
1559 	if (address == NULL)
1560 		address = heap_raw_alloc(heap, size, alignment);
1561 
1562 #if KERNEL_HEAP_LEAK_CHECK
1563 	size -= sizeof(heap_leak_check_info);
1564 #endif
1565 
1566 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1567 		size, address));
1568 
1569 	T(Allocate((addr_t)address, size));
1570 	if (address == NULL)
1571 		return address;
1572 
1573 #if PARANOID_KERNEL_MALLOC
1574 	memset(address, 0xcc, size);
1575 #endif
1576 
1577 #if PARANOID_KERNEL_FREE
1578 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1579 	// and the user does not clear it
1580 	if (((uint32 *)address)[1] == 0xdeadbeef)
1581 		((uint32 *)address)[1] = 0xcccccccc;
1582 #endif
1583 
1584 	return address;
1585 }
1586 
1587 
1588 status_t
1589 heap_free(heap_allocator *heap, void *address)
1590 {
1591 	if (address == NULL)
1592 		return B_OK;
1593 
1594 	ReadLocker areaReadLocker(heap->area_lock);
1595 	heap_area *area = heap->all_areas;
1596 	while (area) {
1597 		// since the all_areas list is ordered by base with the biggest
1598 		// base at the top, we need only find the first area with a base
1599 		// smaller than our address to become our only candidate for freeing
1600 		if (area->base <= (addr_t)address) {
1601 			if ((addr_t)address >= area->base + area->size) {
1602 				// none of the other areas can contain the address as the list
1603 				// is ordered
1604 				return B_ENTRY_NOT_FOUND;
1605 			}
1606 
1607 			// this area contains the allocation, we're done searching
1608 			break;
1609 		}
1610 
1611 		area = area->all_next;
1612 	}
1613 
1614 	if (area == NULL) {
1615 		// this address does not belong to us
1616 		return B_ENTRY_NOT_FOUND;
1617 	}
1618 
1619 	TRACE(("free(): asked to free pointer %p\n", address));
1620 
1621 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1622 		/ heap->page_size];
1623 
1624 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1625 		page->bin_index, page->free_count));
1626 
1627 	if (page->bin_index > heap->bin_count) {
1628 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1629 		return B_ERROR;
1630 	}
1631 
1632 	if (page->bin_index < heap->bin_count) {
1633 		// small allocation
1634 		heap_bin *bin = &heap->bins[page->bin_index];
1635 
1636 #if PARANOID_KERNEL_FREE
1637 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1638 			// This block looks like it was freed already, walk the free list
1639 			// on this page to make sure this address doesn't exist.
1640 			MutexLocker binLocker(bin->lock);
1641 			for (addr_t *temp = page->free_list; temp != NULL;
1642 					temp = (addr_t *)*temp) {
1643 				if (temp == address) {
1644 					panic("free(): address %p already exists in page free "
1645 						"list\n", address);
1646 					return B_ERROR;
1647 				}
1648 			}
1649 		}
1650 
1651 		// the first 4 bytes are overwritten with the next free list pointer
1652 		// later
1653 		uint32 *dead = (uint32 *)address;
1654 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1655 			dead[i] = 0xdeadbeef;
1656 #endif
1657 
1658 		MutexLocker binLocker(bin->lock);
1659 		if (((addr_t)address - area->base - page->index
1660 			* heap->page_size) % bin->element_size != 0) {
1661 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1662 				"element size %ld\n", address, bin->element_size);
1663 			return B_ERROR;
1664 		}
1665 
1666 		// add the address to the page free list
1667 		*(addr_t *)address = (addr_t)page->free_list;
1668 		page->free_list = (addr_t *)address;
1669 		page->free_count++;
1670 
1671 		if (page->free_count == bin->max_free_count) {
1672 			// we are now empty, remove the page from the bin list
1673 			MutexLocker pageLocker(heap->page_lock);
1674 			heap_unlink_page(page, &bin->page_list);
1675 			page->in_use = 0;
1676 			heap_link_page(page, &area->free_pages);
1677 			heap_free_pages_added(heap, area, 1);
1678 		} else if (page->free_count == 1) {
1679 			// we need to add ourselfs to the page list of the bin
1680 			heap_link_page(page, &bin->page_list);
1681 		} else {
1682 			// we might need to move back in the free pages list
1683 			if (page->next && page->next->free_count < page->free_count) {
1684 				// move ourselfs so the list stays ordered
1685 				heap_page *insert = page->next;
1686 				while (insert->next
1687 					&& insert->next->free_count < page->free_count)
1688 					insert = insert->next;
1689 
1690 				heap_unlink_page(page, &bin->page_list);
1691 
1692 				page->prev = insert;
1693 				page->next = insert->next;
1694 				if (page->next)
1695 					page->next->prev = page;
1696 				insert->next = page;
1697 			}
1698 		}
1699 	} else {
1700 		// large allocation, just return the pages to the page free list
1701 		uint32 allocationID = page->allocation_id;
1702 		uint32 maxPages = area->page_count - page->index;
1703 		uint32 pageCount = 0;
1704 
1705 		MutexLocker pageLocker(heap->page_lock);
1706 		for (uint32 i = 0; i < maxPages; i++) {
1707 			// loop until we find the end of this allocation
1708 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1709 				|| page[i].allocation_id != allocationID)
1710 				break;
1711 
1712 			// this page still belongs to the same allocation
1713 			page[i].in_use = 0;
1714 			page[i].allocation_id = 0;
1715 
1716 			// return it to the free list
1717 			heap_link_page(&page[i], &area->free_pages);
1718 			pageCount++;
1719 		}
1720 
1721 		heap_free_pages_added(heap, area, pageCount);
1722 	}
1723 
1724 	T(Free((addr_t)address));
1725 	areaReadLocker.Unlock();
1726 
1727 	if (heap->empty_areas > 1) {
1728 		WriteLocker areaWriteLocker(heap->area_lock);
1729 		MutexLocker pageLocker(heap->page_lock);
1730 
1731 		area_id areasToDelete[heap->empty_areas - 1];
1732 		int32 areasToDeleteIndex = 0;
1733 
1734 		area = heap->areas;
1735 		while (area != NULL && heap->empty_areas > 1) {
1736 			heap_area *next = area->next;
1737 			if (area->area >= 0
1738 				&& area->free_page_count == area->page_count
1739 				&& heap_remove_area(heap, area) == B_OK) {
1740 				areasToDelete[areasToDeleteIndex++] = area->area;
1741 				heap->empty_areas--;
1742 			}
1743 
1744 			area = next;
1745 		}
1746 
1747 		pageLocker.Unlock();
1748 		areaWriteLocker.Unlock();
1749 
1750 		for (int32 i = 0; i < areasToDeleteIndex; i++)
1751 			delete_area(areasToDelete[i]);
1752 	}
1753 
1754 	return B_OK;
1755 }
1756 
1757 
1758 #if KERNEL_HEAP_LEAK_CHECK
1759 extern "C" void
1760 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1761 {
1762 	heap->get_caller = getCaller;
1763 }
1764 #endif
1765 
1766 
1767 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
1768 
1769 
1770 static status_t
1771 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1772 	size_t newSize)
1773 {
1774 	ReadLocker areaReadLocker(heap->area_lock);
1775 	heap_area *area = heap->all_areas;
1776 	while (area) {
1777 		// since the all_areas list is ordered by base with the biggest
1778 		// base at the top, we need only find the first area with a base
1779 		// smaller than our address to become our only candidate for
1780 		// reallocating
1781 		if (area->base <= (addr_t)address) {
1782 			if ((addr_t)address >= area->base + area->size) {
1783 				// none of the other areas can contain the address as the list
1784 				// is ordered
1785 				return B_ENTRY_NOT_FOUND;
1786 			}
1787 
1788 			// this area contains the allocation, we're done searching
1789 			break;
1790 		}
1791 
1792 		area = area->all_next;
1793 	}
1794 
1795 	if (area == NULL) {
1796 		// this address does not belong to us
1797 		return B_ENTRY_NOT_FOUND;
1798 	}
1799 
1800 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1801 
1802 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1803 		/ heap->page_size];
1804 	if (page->bin_index > heap->bin_count) {
1805 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1806 			page->bin_index);
1807 		return B_ERROR;
1808 	}
1809 
1810 	// find out the size of the old allocation first
1811 	size_t minSize = 0;
1812 	size_t maxSize = 0;
1813 	if (page->bin_index < heap->bin_count) {
1814 		// this was a small allocation
1815 		heap_bin *bin = &heap->bins[page->bin_index];
1816 		maxSize = bin->element_size;
1817 		if (page->bin_index > 0)
1818 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1819 	} else {
1820 		// this was a large allocation
1821 		uint32 allocationID = page->allocation_id;
1822 		uint32 maxPages = area->page_count - page->index;
1823 		maxSize = heap->page_size;
1824 
1825 		MutexLocker pageLocker(heap->page_lock);
1826 		for (uint32 i = 1; i < maxPages; i++) {
1827 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1828 				|| page[i].allocation_id != allocationID)
1829 				break;
1830 
1831 			minSize += heap->page_size;
1832 			maxSize += heap->page_size;
1833 		}
1834 	}
1835 
1836 	areaReadLocker.Unlock();
1837 
1838 #if KERNEL_HEAP_LEAK_CHECK
1839 	newSize += sizeof(heap_leak_check_info);
1840 #endif
1841 
1842 	// does the new allocation simply fit in the old allocation?
1843 	if (newSize > minSize && newSize <= maxSize) {
1844 #if KERNEL_HEAP_LEAK_CHECK
1845 		// update the size info (the info is at the end so stays where it is)
1846 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1847 			+ maxSize - sizeof(heap_leak_check_info));
1848 		info->size = newSize - sizeof(heap_leak_check_info);
1849 		newSize -= sizeof(heap_leak_check_info);
1850 #endif
1851 
1852 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1853 		*newAddress = address;
1854 		return B_OK;
1855 	}
1856 
1857 #if KERNEL_HEAP_LEAK_CHECK
1858 	// new leak check info will be created with the malloc below
1859 	newSize -= sizeof(heap_leak_check_info);
1860 #endif
1861 
1862 	// if not, allocate a new chunk of memory
1863 	*newAddress = memalign(0, newSize);
1864 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1865 	if (*newAddress == NULL) {
1866 		// we tried but it didn't work out, but still the operation is done
1867 		return B_OK;
1868 	}
1869 
1870 	// copy the old data and free the old allocation
1871 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1872 	heap_free(heap, address);
1873 	return B_OK;
1874 }
1875 
1876 
1877 inline uint32
1878 heap_index_for(size_t size, int32 cpu)
1879 {
1880 #if KERNEL_HEAP_LEAK_CHECK
1881 	// take the extra info size into account
1882 	size += sizeof(heap_leak_check_info_s);
1883 #endif
1884 
1885 	uint32 index = 0;
1886 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1887 		if (size <= sHeapClasses[index].max_allocation_size)
1888 			break;
1889 	}
1890 
1891 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1892 }
1893 
1894 
1895 static void *
1896 memalign_nogrow(size_t alignment, size_t size)
1897 {
1898 	// use dedicated memory in the grow thread by default
1899 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1900 		void *result = heap_memalign(sGrowHeap, alignment, size);
1901 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1902 			// hopefully the heap grower will manage to create a new heap
1903 			// before running out of private memory...
1904 			dprintf("heap: requesting new grow heap\n");
1905 			sAddGrowHeap = true;
1906 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1907 		}
1908 
1909 		if (result != NULL)
1910 			return result;
1911 	}
1912 
1913 	// try public memory, there might be something available
1914 	void *result = NULL;
1915 	int32 cpuCount = MIN(smp_get_num_cpus(),
1916 		(int32)sHeapCount / HEAP_CLASS_COUNT);
1917 	int32 cpuNumber = smp_get_current_cpu();
1918 	for (int32 i = 0; i < cpuCount; i++) {
1919 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1920 		heap_allocator *heap = sHeaps[heapIndex];
1921 		result = heap_memalign(heap, alignment, size);
1922 		if (result != NULL)
1923 			return result;
1924 	}
1925 
1926 	// no memory available
1927 	if (thread_get_current_thread_id() == sHeapGrowThread)
1928 		panic("heap: all heaps have run out of memory while growing\n");
1929 	else
1930 		dprintf("heap: all heaps have run out of memory\n");
1931 
1932 	return NULL;
1933 }
1934 
1935 
1936 static status_t
1937 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1938 {
1939 	void *address = NULL;
1940 	area_id heapArea = create_area(name, &address,
1941 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1942 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1943 	if (heapArea < B_OK) {
1944 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1945 		return heapArea;
1946 	}
1947 
1948 	heap_add_area(heap, heapArea, (addr_t)address, size);
1949 #if PARANOID_HEAP_VALIDATION
1950 	heap_validate_heap(heap);
1951 #endif
1952 	return B_OK;
1953 }
1954 
1955 
1956 static int32
1957 heap_grow_thread(void *)
1958 {
1959 	while (true) {
1960 		// wait for a request to grow the heap list
1961 		if (acquire_sem(sHeapGrowSem) < B_OK)
1962 			continue;
1963 
1964 		if (sAddGrowHeap) {
1965 			// the grow heap is going to run full soon, try to allocate a new
1966 			// one to make some room.
1967 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1968 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1969 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1970 				dprintf("heap_grower: failed to create new grow heap area\n");
1971 		}
1972 
1973 		for (uint32 i = 0; i < sHeapCount; i++) {
1974 			heap_allocator *heap = sHeaps[i];
1975 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1976 				|| heap_should_grow(heap)) {
1977 				// grow this heap if it is nearly full or if a grow was
1978 				// explicitly requested for this heap (happens when a large
1979 				// allocation cannot be fulfilled due to lack of contiguous
1980 				// pages)
1981 				if (heap_create_new_heap_area(heap, "additional heap",
1982 						HEAP_GROW_SIZE) != B_OK)
1983 					dprintf("heap_grower: failed to create new heap area\n");
1984 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1985 			}
1986 		}
1987 
1988 		// notify anyone waiting for this request
1989 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1990 	}
1991 
1992 	return 0;
1993 }
1994 
1995 
1996 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
1997 
1998 
1999 static void
2000 deferred_deleter(void *arg, int iteration)
2001 {
2002 	// move entries and deletables to on-stack lists
2003 	InterruptsSpinLocker locker(sDeferredFreeListLock);
2004 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
2005 		return;
2006 
2007 	DeferredFreeList entries;
2008 	entries.MoveFrom(&sDeferredFreeList);
2009 
2010 	DeferredDeletableList deletables;
2011 	deletables.MoveFrom(&sDeferredDeletableList);
2012 
2013 	locker.Unlock();
2014 
2015 	// free the entries
2016 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
2017 		free(entry);
2018 
2019 	// delete the deletables
2020 	while (DeferredDeletable* deletable = deletables.RemoveHead())
2021 		delete deletable;
2022 }
2023 
2024 
2025 //	#pragma mark -
2026 
2027 
2028 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2029 
2030 
2031 status_t
2032 heap_init(addr_t base, size_t size)
2033 {
2034 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2035 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2036 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2037 			&sHeapClasses[i], false);
2038 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2039 		base += partSize;
2040 		sHeapCount++;
2041 	}
2042 
2043 	// set up some debug commands
2044 	add_debugger_command_etc("heap", &dump_heap_list,
2045 		"Dump infos about the kernel heap(s)",
2046 		"[(\"grow\" | \"stats\" | <heap>)]\n"
2047 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2048 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2049 		"given as the argument, currently only the heap count is printed.\n"
2050 		"If <heap> is given, it is interpreted as the address of the heap to\n"
2051 		"print infos about.\n", 0);
2052 #if !KERNEL_HEAP_LEAK_CHECK
2053 	add_debugger_command_etc("allocations", &dump_allocations,
2054 		"Dump current heap allocations",
2055 		"[\"stats\"] [<heap>]\n"
2056 		"If no parameters are given, all current alloactions are dumped.\n"
2057 		"If the optional argument \"stats\" is specified, only the allocation\n"
2058 		"counts and no individual allocations are printed\n"
2059 		"If a specific heap address is given, only allocations of this\n"
2060 		"allocator are dumped\n", 0);
2061 #else // !KERNEL_HEAP_LEAK_CHECK
2062 	add_debugger_command_etc("allocations", &dump_allocations,
2063 		"Dump current heap allocations",
2064 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2065 		"If no parameters are given, all current alloactions are dumped.\n"
2066 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2067 		"argument, only allocations matching the team ID, thread ID, caller\n"
2068 		"address or allocated address given in the second argument are printed.\n"
2069 		"If the optional argument \"stats\" is specified, only the allocation\n"
2070 		"counts and no individual allocations are printed.\n", 0);
2071 	add_debugger_command_etc("allocations_per_caller",
2072 		&dump_allocations_per_caller,
2073 		"Dump current heap allocations summed up per caller",
2074 		"[ \"-c\" ] [ -h <heap> ]\n"
2075 		"The current allocations will by summed up by caller (their count and\n"
2076 		"size) printed in decreasing order by size or, if \"-c\" is\n"
2077 		"specified, by allocation count. If given <heap> specifies the\n"
2078 		"address of the heap for which to print the allocations.\n", 0);
2079 #endif // KERNEL_HEAP_LEAK_CHECK
2080 	return B_OK;
2081 }
2082 
2083 
2084 status_t
2085 heap_init_post_area()
2086 {
2087 	void *address = NULL;
2088 	area_id growHeapArea = create_area("dedicated grow heap", &address,
2089 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2090 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2091 	if (growHeapArea < 0) {
2092 		panic("heap_init_post_area(): couldn't allocate dedicate grow heap "
2093 			"area");
2094 		return growHeapArea;
2095 	}
2096 
2097 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2098 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2099 	if (sGrowHeap == NULL) {
2100 		panic("heap_init_post_area(): failed to create dedicated grow heap\n");
2101 		return B_ERROR;
2102 	}
2103 
2104 	// create the VIP heap
2105 	static const heap_class heapClass = {
2106 		"VIP I/O",					/* name */
2107 		100,						/* initial percentage */
2108 		B_PAGE_SIZE / 8,			/* max allocation size */
2109 		B_PAGE_SIZE,				/* page size */
2110 		8,							/* min bin size */
2111 		4,							/* bin alignment */
2112 		8,							/* min count per page */
2113 		16							/* max waste per page */
2114 	};
2115 
2116 	area_id vipHeapArea = create_area("VIP heap", &address,
2117 		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2118 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2119 	if (vipHeapArea < 0) {
2120 		panic("heap_init_post_area(): couldn't allocate VIP heap area");
2121 		return B_ERROR;
2122 	}
2123 
2124 	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2125 		VIP_HEAP_SIZE, &heapClass, false);
2126 	if (sVIPHeap == NULL) {
2127 		panic("heap_init_post_area(): failed to create VIP heap\n");
2128 		return B_ERROR;
2129 	}
2130 
2131 	dprintf("heap_init_post_area(): created VIP heap: %p\n", sVIPHeap);
2132 
2133 	return B_OK;
2134 }
2135 
2136 
2137 status_t
2138 heap_init_post_sem()
2139 {
2140 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2141 	if (sHeapGrowSem < 0) {
2142 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2143 		return B_ERROR;
2144 	}
2145 
2146 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2147 	if (sHeapGrownNotify < 0) {
2148 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2149 		return B_ERROR;
2150 	}
2151 
2152 	return B_OK;
2153 }
2154 
2155 
2156 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2157 
2158 
2159 status_t
2160 heap_init_post_thread()
2161 {
2162 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
2163 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2164 		B_URGENT_PRIORITY, NULL);
2165 	if (sHeapGrowThread < 0) {
2166 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2167 		return sHeapGrowThread;
2168 	}
2169 
2170 	// create per-cpu heaps if there's enough memory
2171 	int32 heapCount = MIN(smp_get_num_cpus(),
2172 		(int32)vm_page_num_pages() / 60 / 1024);
2173 	for (int32 i = 1; i < heapCount; i++) {
2174 		addr_t base = 0;
2175 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2176 		area_id perCPUHeapArea = create_area("per cpu initial heap",
2177 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2178 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2179 		if (perCPUHeapArea < 0)
2180 			break;
2181 
2182 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2183 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2184 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2185 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2186 				base, partSize, &sHeapClasses[j], false);
2187 			sLastGrowRequest[heapIndex] = 0;
2188 			sLastHandledGrowRequest[heapIndex] = 0;
2189 			base += partSize;
2190 			sHeapCount++;
2191 		}
2192 	}
2193 
2194 	resume_thread(sHeapGrowThread);
2195 
2196 #else	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2197 
2198 	// set up some debug commands
2199 	add_debugger_command_etc("heap", &dump_heap_list,
2200 		"Dump infos about a specific heap",
2201 		"[\"stats\"] <heap>\n"
2202 		"Dump infos about the specified kernel heap. If \"stats\" is given\n"
2203 		"as the argument, currently only the heap count is printed.\n", 0);
2204 #if !KERNEL_HEAP_LEAK_CHECK
2205 	add_debugger_command_etc("heap_allocations", &dump_allocations,
2206 		"Dump current heap allocations",
2207 		"[\"stats\"] <heap>\n"
2208 		"If the optional argument \"stats\" is specified, only the allocation\n"
2209 		"counts and no individual allocations are printed.\n", 0);
2210 #endif	// KERNEL_HEAP_LEAK_CHECK
2211 #endif	// USE_SLAB_ALLOCATOR_FOR_MALLOC
2212 
2213 	// run the deferred deleter roughly once a second
2214 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2215 		panic("heap_init_post_thread(): failed to init deferred deleter");
2216 
2217 	return B_OK;
2218 }
2219 
2220 
2221 //	#pragma mark - Public API
2222 
2223 
2224 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2225 
2226 
2227 void *
2228 memalign(size_t alignment, size_t size)
2229 {
2230 	if (!gKernelStartup && !are_interrupts_enabled()) {
2231 		panic("memalign(): called with interrupts disabled\n");
2232 		return NULL;
2233 	}
2234 
2235 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2236 		// don't even attempt such a huge allocation - use areas instead
2237 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2238 			+ alignment, B_PAGE_SIZE);
2239 		if (areaSize < size) {
2240 			// the size overflowed
2241 			return NULL;
2242 		}
2243 
2244 		void *address = NULL;
2245 		area_id allocationArea = create_area("memalign area", &address,
2246 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2247 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2248 		if (allocationArea < B_OK) {
2249 			dprintf("heap: failed to create area for huge allocation\n");
2250 			return NULL;
2251 		}
2252 
2253 		area_allocation_info *info = (area_allocation_info *)address;
2254 		info->magic = kAreaAllocationMagic;
2255 		info->area = allocationArea;
2256 		info->base = address;
2257 		info->size = areaSize;
2258 		info->allocation_size = size;
2259 		info->allocation_alignment = alignment;
2260 
2261 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2262 		if (alignment != 0) {
2263 			address = (void *)ROUNDUP((addr_t)address, alignment);
2264 			ASSERT((addr_t)address % alignment == 0);
2265 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2266 		}
2267 
2268 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2269 			allocationArea, size));
2270 
2271 		info->allocation_base = address;
2272 
2273 #if PARANOID_KERNEL_MALLOC
2274 		memset(address, 0xcc, size);
2275 #endif
2276 		return address;
2277 	}
2278 
2279 	void *result = NULL;
2280 	bool shouldGrow = false;
2281 	int32 cpuCount = MIN(smp_get_num_cpus(),
2282 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2283 	int32 cpuNumber = smp_get_current_cpu();
2284 	for (int32 i = 0; i < cpuCount; i++) {
2285 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2286 		heap_allocator *heap = sHeaps[heapIndex];
2287 		result = heap_memalign(heap, alignment, size);
2288 		if (result != NULL) {
2289 			shouldGrow = heap_should_grow(heap);
2290 			break;
2291 		}
2292 
2293 #if PARANOID_HEAP_VALIDATION
2294 		heap_validate_heap(heap);
2295 #endif
2296 	}
2297 
2298 	if (result == NULL) {
2299 		// request an urgent grow and wait - we don't do it ourselfs here to
2300 		// serialize growing through the grow thread, as otherwise multiple
2301 		// threads hitting this situation (likely when memory ran out) would
2302 		// all add areas
2303 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2304 		sLastGrowRequest[heapIndex]++;
2305 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2306 
2307 		// and then try again
2308 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2309 	} else if (shouldGrow) {
2310 		// should grow sometime soon, notify the grower
2311 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2312 	}
2313 
2314 	if (result == NULL)
2315 		panic("heap: kernel heap has run out of memory\n");
2316 	return result;
2317 }
2318 
2319 
2320 void *
2321 memalign_etc(size_t alignment, size_t size, uint32 flags)
2322 {
2323 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2324 		return heap_memalign(sVIPHeap, alignment, size);
2325 
2326 	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2327 			!= 0) {
2328 		return memalign_nogrow(alignment, size);
2329 	}
2330 
2331 	return memalign(alignment, size);
2332 }
2333 
2334 
2335 void
2336 free_etc(void *address, uint32 flags)
2337 {
2338 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2339 		heap_free(sVIPHeap, address);
2340 	else
2341 		free(address);
2342 }
2343 
2344 
2345 void *
2346 malloc(size_t size)
2347 {
2348 	return memalign(0, size);
2349 }
2350 
2351 
2352 void
2353 free(void *address)
2354 {
2355 	if (!gKernelStartup && !are_interrupts_enabled()) {
2356 		panic("free(): called with interrupts disabled\n");
2357 		return;
2358 	}
2359 
2360 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2361 	for (uint32 i = 0; i < sHeapCount; i++) {
2362 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2363 		if (heap_free(heap, address) == B_OK) {
2364 #if PARANOID_HEAP_VALIDATION
2365 			heap_validate_heap(heap);
2366 #endif
2367 			return;
2368 		}
2369 	}
2370 
2371 	// maybe it was allocated from the dedicated grow heap
2372 	if (heap_free(sGrowHeap, address) == B_OK)
2373 		return;
2374 
2375 	// or maybe it was allocated from the VIP heap
2376 	if (heap_free(sVIPHeap, address) == B_OK)
2377 		return;
2378 
2379 	// or maybe it was a huge allocation using an area
2380 	area_info areaInfo;
2381 	area_id area = area_for(address);
2382 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2383 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2384 
2385 		// just make extra sure it was allocated by us
2386 		if (info->magic == kAreaAllocationMagic && info->area == area
2387 			&& info->size == areaInfo.size && info->base == areaInfo.address
2388 			&& info->allocation_size < areaInfo.size) {
2389 			delete_area(area);
2390 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2391 				area));
2392 			return;
2393 		}
2394 	}
2395 
2396 	panic("free(): free failed for address %p\n", address);
2397 }
2398 
2399 
2400 void *
2401 realloc(void *address, size_t newSize)
2402 {
2403 	if (!gKernelStartup && !are_interrupts_enabled()) {
2404 		panic("realloc(): called with interrupts disabled\n");
2405 		return NULL;
2406 	}
2407 
2408 	if (address == NULL)
2409 		return memalign(0, newSize);
2410 
2411 	if (newSize == 0) {
2412 		free(address);
2413 		return NULL;
2414 	}
2415 
2416 	void *newAddress = NULL;
2417 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2418 	for (uint32 i = 0; i < sHeapCount; i++) {
2419 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2420 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2421 #if PARANOID_HEAP_VALIDATION
2422 			heap_validate_heap(heap);
2423 #endif
2424 			return newAddress;
2425 		}
2426 	}
2427 
2428 	// maybe it was allocated from the dedicated grow heap
2429 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2430 		return newAddress;
2431 
2432 	// or maybe it was a huge allocation using an area
2433 	area_info areaInfo;
2434 	area_id area = area_for(address);
2435 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2436 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2437 
2438 		// just make extra sure it was allocated by us
2439 		if (info->magic == kAreaAllocationMagic && info->area == area
2440 			&& info->size == areaInfo.size && info->base == areaInfo.address
2441 			&& info->allocation_size < areaInfo.size) {
2442 			size_t available = info->size - ((addr_t)info->allocation_base
2443 				- (addr_t)info->base);
2444 
2445 			if (available >= newSize) {
2446 				// there is enough room available for the newSize
2447 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2448 					"available\n", newSize, area, available));
2449 				info->allocation_size = newSize;
2450 				return address;
2451 			}
2452 
2453 			// have to allocate/copy/free - TODO maybe resize the area instead?
2454 			newAddress = memalign(0, newSize);
2455 			if (newAddress == NULL) {
2456 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2457 					newSize);
2458 				return NULL;
2459 			}
2460 
2461 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2462 			delete_area(area);
2463 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2464 				"old area %ld\n", newAddress, newSize, area));
2465 			return newAddress;
2466 		}
2467 	}
2468 
2469 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2470 		newSize);
2471 	return NULL;
2472 }
2473 
2474 
2475 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2476 
2477 
2478 void *
2479 calloc(size_t numElements, size_t size)
2480 {
2481 	void *address = memalign(0, numElements * size);
2482 	if (address != NULL)
2483 		memset(address, 0, numElements * size);
2484 
2485 	return address;
2486 }
2487 
2488 
2489 void
2490 deferred_free(void *block)
2491 {
2492 	if (block == NULL)
2493 		return;
2494 
2495 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2496 
2497 	InterruptsSpinLocker _(sDeferredFreeListLock);
2498 	sDeferredFreeList.Add(entry);
2499 }
2500 
2501 
2502 void *
2503 malloc_referenced(size_t size)
2504 {
2505 	int32 *referencedData = (int32 *)malloc(size + 4);
2506 	if (referencedData == NULL)
2507 		return NULL;
2508 
2509 	*referencedData = 1;
2510 	return referencedData + 1;
2511 }
2512 
2513 
2514 void *
2515 malloc_referenced_acquire(void *data)
2516 {
2517 	if (data != NULL) {
2518 		int32 *referencedData = (int32 *)data - 1;
2519 		atomic_add(referencedData, 1);
2520 	}
2521 
2522 	return data;
2523 }
2524 
2525 
2526 void
2527 malloc_referenced_release(void *data)
2528 {
2529 	if (data == NULL)
2530 		return;
2531 
2532 	int32 *referencedData = (int32 *)data - 1;
2533 	if (atomic_add(referencedData, -1) < 1)
2534 		free(referencedData);
2535 }
2536 
2537 
2538 DeferredDeletable::~DeferredDeletable()
2539 {
2540 }
2541 
2542 
2543 void
2544 deferred_delete(DeferredDeletable *deletable)
2545 {
2546 	if (deletable == NULL)
2547 		return;
2548 
2549 	InterruptsSpinLocker _(sDeferredFreeListLock);
2550 	sDeferredDeletableList.Add(deletable);
2551 }
2552