xref: /haiku/src/system/kernel/heap.cpp (revision 3be9edf8da228afd9fec0390f408c964766122aa)
1 /*
2  * Copyright 2008-2009, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <signal.h>
19 #include <string.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tracing.h>
23 #include <util/AutoLock.h>
24 #include <vm.h>
25 #include <vm_page.h>
26 
27 
28 //#define TRACE_HEAP
29 #ifdef TRACE_HEAP
30 #	define TRACE(x) dprintf x
31 #else
32 #	define TRACE(x) ;
33 #endif
34 
35 
36 #if KERNEL_HEAP_LEAK_CHECK
37 typedef struct heap_leak_check_info_s {
38 	addr_t		caller;
39 	size_t		size;
40 	thread_id	thread;
41 	team_id		team;
42 } heap_leak_check_info;
43 
44 struct caller_info {
45 	addr_t		caller;
46 	uint32		count;
47 	uint32		size;
48 };
49 
50 static const int32 kCallerInfoTableSize = 1024;
51 static caller_info sCallerInfoTable[kCallerInfoTableSize];
52 static int32 sCallerInfoCount = 0;
53 #endif
54 
55 
56 typedef struct heap_page_s heap_page;
57 
58 
59 typedef struct heap_area_s {
60 	area_id			area;
61 
62 	addr_t			base;
63 	size_t			size;
64 
65 	uint32			page_count;
66 	uint32			free_page_count;
67 
68 	heap_page *		free_pages;
69 	heap_page *		page_table;
70 
71 	heap_area_s *	prev;
72 	heap_area_s *	next;
73 	heap_area_s *	all_next;
74 } heap_area;
75 
76 
77 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
78 
79 typedef struct heap_page_s {
80 	heap_area *		area;
81 	uint16			index;
82 	uint16			bin_index : 5;
83 	uint16			free_count : 10;
84 	uint16			in_use : 1;
85 	heap_page_s *	next;
86 	heap_page_s *	prev;
87 	union {
88 		uint16			empty_index;
89 		uint16			allocation_id; // used for bin == bin_count allocations
90 	};
91 	addr_t *		free_list;
92 } heap_page;
93 
94 
95 typedef struct heap_bin_s {
96 	mutex		lock;
97 	uint32		element_size;
98 	uint16		max_free_count;
99 	heap_page *	page_list; // sorted so that the desired page is always first
100 } heap_bin;
101 
102 
103 struct heap_allocator_s {
104 	rw_lock		area_lock;
105 	mutex		page_lock;
106 
107 	const char *name;
108 	uint32		bin_count;
109 	uint32		page_size;
110 
111 	uint32		total_pages;
112 	uint32		total_free_pages;
113 	uint32		empty_areas;
114 
115 #if KERNEL_HEAP_LEAK_CHECK
116 	addr_t		(*get_caller)();
117 #endif
118 
119 	heap_bin *	bins;
120 	heap_area *	areas; // sorted so that the desired area is always first
121 	heap_area *	all_areas; // all areas including full ones
122 };
123 
124 
125 static const uint32 kAreaAllocationMagic = 'AAMG';
126 typedef struct area_allocation_info_s {
127 	area_id		area;
128 	void *		base;
129 	uint32		magic;
130 	size_t		size;
131 	size_t		allocation_size;
132 	size_t		allocation_alignment;
133 	void *		allocation_base;
134 } area_allocation_info;
135 
136 
137 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
138 };
139 
140 
141 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
142 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
143 
144 
145 // Heap class configuration
146 #define HEAP_CLASS_COUNT 3
147 static heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
148 	{
149 		"small",					/* name */
150 		50,							/* initial percentage */
151 		B_PAGE_SIZE / 8,			/* max allocation size */
152 		B_PAGE_SIZE,				/* page size */
153 		8,							/* min bin size */
154 		4,							/* bin alignment */
155 		8,							/* min count per page */
156 		16							/* max waste per page */
157 	},
158 	{
159 		"medium",					/* name */
160 		30,							/* initial percentage */
161 		B_PAGE_SIZE * 2,			/* max allocation size */
162 		B_PAGE_SIZE * 8,			/* page size */
163 		B_PAGE_SIZE / 8,			/* min bin size */
164 		32,							/* bin alignment */
165 		4,							/* min count per page */
166 		64							/* max waste per page */
167 	},
168 	{
169 		"large",					/* name */
170 		20,							/* initial percentage */
171 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
172 		B_PAGE_SIZE * 16,			/* page size */
173 		B_PAGE_SIZE * 2,			/* min bin size */
174 		128,						/* bin alignment */
175 		1,							/* min count per page */
176 		256							/* max waste per page */
177 	}
178 };
179 
180 
181 static uint32 sHeapCount;
182 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
183 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
184 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
185 
186 static heap_allocator *sGrowHeap = NULL;
187 static thread_id sHeapGrowThread = -1;
188 static sem_id sHeapGrowSem = -1;
189 static sem_id sHeapGrownNotify = -1;
190 static bool sAddGrowHeap = false;
191 static DeferredFreeList sDeferredFreeList;
192 static DeferredDeletableList sDeferredDeletableList;
193 static spinlock sDeferredFreeListLock;
194 
195 
196 
197 // #pragma mark - Tracing
198 
199 #if KERNEL_HEAP_TRACING
200 namespace KernelHeapTracing {
201 
202 class Allocate : public AbstractTraceEntry {
203 	public:
204 		Allocate(addr_t address, size_t size)
205 			:	fAddress(address),
206 				fSize(size)
207 		{
208 			Initialized();
209 		}
210 
211 		virtual void AddDump(TraceOutput &out)
212 		{
213 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
214 		}
215 
216 	private:
217 		addr_t	fAddress;
218 		size_t	fSize;
219 };
220 
221 
222 class Reallocate : public AbstractTraceEntry {
223 	public:
224 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
225 			:	fOldAddress(oldAddress),
226 				fNewAddress(newAddress),
227 				fNewSize(newSize)
228 		{
229 			Initialized();
230 		};
231 
232 		virtual void AddDump(TraceOutput &out)
233 		{
234 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
235 				fOldAddress, fNewAddress, fNewSize);
236 		}
237 
238 	private:
239 		addr_t	fOldAddress;
240 		addr_t	fNewAddress;
241 		size_t	fNewSize;
242 };
243 
244 
245 class Free : public AbstractTraceEntry {
246 	public:
247 		Free(addr_t address)
248 			:	fAddress(address)
249 		{
250 			Initialized();
251 		};
252 
253 		virtual void AddDump(TraceOutput &out)
254 		{
255 			out.Print("heap free: 0x%08lx", fAddress);
256 		}
257 
258 	private:
259 		addr_t	fAddress;
260 };
261 
262 
263 } // namespace KernelHeapTracing
264 
265 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
266 #else
267 #	define T(x)	;
268 #endif
269 
270 
271 // #pragma mark - Debug functions
272 
273 
274 #if KERNEL_HEAP_LEAK_CHECK
275 static addr_t
276 get_caller()
277 {
278 	// Find the first return address outside of the allocator code. Note, that
279 	// this makes certain assumptions about how the code for the functions
280 	// ends up in the kernel object.
281 	addr_t returnAddresses[5];
282 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1, false);
283 	for (int32 i = 0; i < depth; i++) {
284 		if (returnAddresses[i] < (addr_t)&get_caller
285 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
286 			return returnAddresses[i];
287 		}
288 	}
289 
290 	return 0;
291 }
292 #endif
293 
294 
295 static void
296 dump_page(heap_page *page)
297 {
298 	uint32 count = 0;
299 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
300 		count++;
301 
302 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
303 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
304 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
305 }
306 
307 
308 static void
309 dump_bin(heap_bin *bin)
310 {
311 	uint32 count = 0;
312 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
313 		count++;
314 
315 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
316 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
317 
318 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
319 		dump_page(page);
320 }
321 
322 
323 static void
324 dump_bin_list(heap_allocator *heap)
325 {
326 	for (uint32 i = 0; i < heap->bin_count; i++)
327 		dump_bin(&heap->bins[i]);
328 	kprintf("\n");
329 }
330 
331 
332 static void
333 dump_allocator_areas(heap_allocator *heap)
334 {
335 	heap_area *area = heap->all_areas;
336 	while (area) {
337 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
338 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
339 			area->size, area->page_count, area->free_pages,
340 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
341 		area = area->all_next;
342 	}
343 
344 	kprintf("\n");
345 }
346 
347 
348 static void
349 dump_allocator(heap_allocator *heap, bool areas, bool bins)
350 {
351 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
352 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
353 		heap->page_size, heap->bin_count, heap->total_pages,
354 		heap->total_free_pages, heap->empty_areas);
355 
356 	if (areas)
357 		dump_allocator_areas(heap);
358 	if (bins)
359 		dump_bin_list(heap);
360 }
361 
362 
363 static int
364 dump_heap_list(int argc, char **argv)
365 {
366 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
367 		// only dump dedicated grow heap info
368 		kprintf("dedicated grow heap:\n");
369 		dump_allocator(sGrowHeap, true, true);
370 		return 0;
371 	}
372 
373 	bool stats = false;
374 	int i = 1;
375 
376 	if (strcmp(argv[1], "stats") == 0) {
377 		stats = true;
378 		i++;
379 	}
380 
381 	uint64 heapAddress = 0;
382 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
383 		print_debugger_command_usage(argv[0]);
384 		return 0;
385 	}
386 
387 	if (heapAddress == 0) {
388 		// dump default kernel heaps
389 		for (uint32 i = 0; i < sHeapCount; i++)
390 			dump_allocator(sHeaps[i], !stats, !stats);
391 	} else {
392 		// dump specified heap
393 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
394 	}
395 
396 	return 0;
397 }
398 
399 
400 #if !KERNEL_HEAP_LEAK_CHECK
401 
402 static int
403 dump_allocations(int argc, char **argv)
404 {
405 	uint64 heapAddress = 0;
406 	bool statsOnly = false;
407 	for (int32 i = 1; i < argc; i++) {
408 		if (strcmp(argv[i], "stats") == 0)
409 			statsOnly = true;
410 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
411 			print_debugger_command_usage(argv[0]);
412 			return 0;
413 		}
414 	}
415 
416 	size_t totalSize = 0;
417 	uint32 totalCount = 0;
418 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
419 		heap_allocator *heap = sHeaps[heapIndex];
420 		if (heapAddress != 0)
421 			heap = (heap_allocator *)(addr_t)heapAddress;
422 
423 		// go through all the pages in all the areas
424 		heap_area *area = heap->all_areas;
425 		while (area) {
426 			for (uint32 i = 0; i < area->page_count; i++) {
427 				heap_page *page = &area->page_table[i];
428 				if (!page->in_use)
429 					continue;
430 
431 				addr_t base = area->base + i * heap->page_size;
432 				if (page->bin_index < heap->bin_count) {
433 					// page is used by a small allocation bin
434 					uint32 elementCount = page->empty_index;
435 					size_t elementSize
436 						= heap->bins[page->bin_index].element_size;
437 					for (uint32 j = 0; j < elementCount;
438 							j++, base += elementSize) {
439 						// walk the free list to see if this element is in use
440 						bool elementInUse = true;
441 						for (addr_t *temp = page->free_list; temp != NULL;
442 								temp = (addr_t *)*temp) {
443 							if ((addr_t)temp == base) {
444 								elementInUse = false;
445 								break;
446 							}
447 						}
448 
449 						if (!elementInUse)
450 							continue;
451 
452 						if (!statsOnly) {
453 							kprintf("address: 0x%08lx; size: %lu bytes\n",
454 								base, elementSize);
455 						}
456 
457 						totalSize += elementSize;
458 						totalCount++;
459 					}
460 				} else {
461 					// page is used by a big allocation, find the page count
462 					uint32 pageCount = 1;
463 					while (i + pageCount < area->page_count
464 						&& area->page_table[i + pageCount].in_use
465 						&& area->page_table[i + pageCount].bin_index
466 							== heap->bin_count
467 						&& area->page_table[i + pageCount].allocation_id
468 							== page->allocation_id)
469 						pageCount++;
470 
471 					size_t size = pageCount * heap->page_size;
472 
473 					if (!statsOnly) {
474 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
475 							size);
476 					}
477 
478 					totalSize += size;
479 					totalCount++;
480 
481 					// skip the allocated pages
482 					i += pageCount - 1;
483 				}
484 			}
485 
486 			area = area->all_next;
487 		}
488 
489 		if (heapAddress != 0)
490 			break;
491 	}
492 
493 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
494 	return 0;
495 }
496 
497 #else // !KERNEL_HEAP_LEAK_CHECK
498 
499 static int
500 dump_allocations(int argc, char **argv)
501 {
502 	team_id team = -1;
503 	thread_id thread = -1;
504 	addr_t caller = 0;
505 	addr_t address = 0;
506 	bool statsOnly = false;
507 
508 	for (int32 i = 1; i < argc; i++) {
509 		if (strcmp(argv[i], "team") == 0)
510 			team = parse_expression(argv[++i]);
511 		else if (strcmp(argv[i], "thread") == 0)
512 			thread = parse_expression(argv[++i]);
513 		else if (strcmp(argv[i], "caller") == 0)
514 			caller = parse_expression(argv[++i]);
515 		else if (strcmp(argv[i], "address") == 0)
516 			address = parse_expression(argv[++i]);
517 		else if (strcmp(argv[i], "stats") == 0)
518 			statsOnly = true;
519 		else {
520 			print_debugger_command_usage(argv[0]);
521 			return 0;
522 		}
523 	}
524 
525 	size_t totalSize = 0;
526 	uint32 totalCount = 0;
527 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
528 		heap_allocator *heap = sHeaps[heapIndex];
529 
530 		// go through all the pages in all the areas
531 		heap_area *area = heap->all_areas;
532 		while (area) {
533 			heap_leak_check_info *info = NULL;
534 			for (uint32 i = 0; i < area->page_count; i++) {
535 				heap_page *page = &area->page_table[i];
536 				if (!page->in_use)
537 					continue;
538 
539 				addr_t base = area->base + i * heap->page_size;
540 				if (page->bin_index < heap->bin_count) {
541 					// page is used by a small allocation bin
542 					uint32 elementCount = page->empty_index;
543 					size_t elementSize
544 						= heap->bins[page->bin_index].element_size;
545 					for (uint32 j = 0; j < elementCount;
546 							j++, base += elementSize) {
547 						// walk the free list to see if this element is in use
548 						bool elementInUse = true;
549 						for (addr_t *temp = page->free_list; temp != NULL;
550 								temp = (addr_t *)*temp) {
551 							if ((addr_t)temp == base) {
552 								elementInUse = false;
553 								break;
554 							}
555 						}
556 
557 						if (!elementInUse)
558 							continue;
559 
560 						info = (heap_leak_check_info *)(base + elementSize
561 							- sizeof(heap_leak_check_info));
562 
563 						if ((team == -1 || info->team == team)
564 							&& (thread == -1 || info->thread == thread)
565 							&& (caller == 0 || info->caller == caller)
566 							&& (address == 0 || base == address)) {
567 							// interesting...
568 							if (!statsOnly) {
569 								kprintf("team: % 6ld; thread: % 6ld; "
570 									"address: 0x%08lx; size: %lu bytes; "
571 									"caller: %#lx\n", info->team, info->thread,
572 									base, info->size, info->caller);
573 							}
574 
575 							totalSize += info->size;
576 							totalCount++;
577 						}
578 					}
579 				} else {
580 					// page is used by a big allocation, find the page count
581 					uint32 pageCount = 1;
582 					while (i + pageCount < area->page_count
583 						&& area->page_table[i + pageCount].in_use
584 						&& area->page_table[i + pageCount].bin_index
585 							== heap->bin_count
586 						&& area->page_table[i + pageCount].allocation_id
587 							== page->allocation_id)
588 						pageCount++;
589 
590 					info = (heap_leak_check_info *)(base + pageCount
591 						* heap->page_size - sizeof(heap_leak_check_info));
592 
593 					if ((team == -1 || info->team == team)
594 						&& (thread == -1 || info->thread == thread)
595 						&& (caller == 0 || info->caller == caller)
596 						&& (address == 0 || base == address)) {
597 						// interesting...
598 						if (!statsOnly) {
599 							kprintf("team: % 6ld; thread: % 6ld;"
600 								" address: 0x%08lx; size: %lu bytes;"
601 								" caller: %#lx\n", info->team, info->thread,
602 								base, info->size, info->caller);
603 						}
604 
605 						totalSize += info->size;
606 						totalCount++;
607 					}
608 
609 					// skip the allocated pages
610 					i += pageCount - 1;
611 				}
612 			}
613 
614 			area = area->all_next;
615 		}
616 	}
617 
618 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
619 		totalSize);
620 	return 0;
621 }
622 
623 
624 static caller_info*
625 get_caller_info(addr_t caller)
626 {
627 	// find the caller info
628 	for (int32 i = 0; i < sCallerInfoCount; i++) {
629 		if (caller == sCallerInfoTable[i].caller)
630 			return &sCallerInfoTable[i];
631 	}
632 
633 	// not found, add a new entry, if there are free slots
634 	if (sCallerInfoCount >= kCallerInfoTableSize)
635 		return NULL;
636 
637 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
638 	info->caller = caller;
639 	info->count = 0;
640 	info->size = 0;
641 
642 	return info;
643 }
644 
645 
646 static int
647 caller_info_compare_size(const void* _a, const void* _b)
648 {
649 	const caller_info* a = (const caller_info*)_a;
650 	const caller_info* b = (const caller_info*)_b;
651 	return (int)(b->size - a->size);
652 }
653 
654 
655 static int
656 caller_info_compare_count(const void* _a, const void* _b)
657 {
658 	const caller_info* a = (const caller_info*)_a;
659 	const caller_info* b = (const caller_info*)_b;
660 	return (int)(b->count - a->count);
661 }
662 
663 
664 static bool
665 analyze_allocation_callers(heap_allocator *heap)
666 {
667 	// go through all the pages in all the areas
668 	heap_area *area = heap->all_areas;
669 	while (area) {
670 		heap_leak_check_info *info = NULL;
671 		for (uint32 i = 0; i < area->page_count; i++) {
672 			heap_page *page = &area->page_table[i];
673 			if (!page->in_use)
674 				continue;
675 
676 			addr_t base = area->base + i * heap->page_size;
677 			if (page->bin_index < heap->bin_count) {
678 				// page is used by a small allocation bin
679 				uint32 elementCount = page->empty_index;
680 				size_t elementSize = heap->bins[page->bin_index].element_size;
681 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
682 					// walk the free list to see if this element is in use
683 					bool elementInUse = true;
684 					for (addr_t *temp = page->free_list; temp != NULL;
685 						temp = (addr_t *)*temp) {
686 						if ((addr_t)temp == base) {
687 							elementInUse = false;
688 							break;
689 						}
690 					}
691 
692 					if (!elementInUse)
693 						continue;
694 
695 					info = (heap_leak_check_info *)(base + elementSize
696 						- sizeof(heap_leak_check_info));
697 
698 					caller_info *callerInfo = get_caller_info(info->caller);
699 					if (callerInfo == NULL) {
700 						kprintf("out of space for caller infos\n");
701 						return 0;
702 					}
703 
704 					callerInfo->count++;
705 					callerInfo->size += info->size;
706 				}
707 			} else {
708 				// page is used by a big allocation, find the page count
709 				uint32 pageCount = 1;
710 				while (i + pageCount < area->page_count
711 					&& area->page_table[i + pageCount].in_use
712 					&& area->page_table[i + pageCount].bin_index
713 						== heap->bin_count
714 					&& area->page_table[i + pageCount].allocation_id
715 						== page->allocation_id) {
716 					pageCount++;
717 				}
718 
719 				info = (heap_leak_check_info *)(base + pageCount
720 					* heap->page_size - sizeof(heap_leak_check_info));
721 
722 				caller_info *callerInfo = get_caller_info(info->caller);
723 				if (callerInfo == NULL) {
724 					kprintf("out of space for caller infos\n");
725 					return false;
726 				}
727 
728 				callerInfo->count++;
729 				callerInfo->size += info->size;
730 
731 				// skip the allocated pages
732 				i += pageCount - 1;
733 			}
734 		}
735 
736 		area = area->all_next;
737 	}
738 
739 	return true;
740 }
741 
742 
743 static int
744 dump_allocations_per_caller(int argc, char **argv)
745 {
746 	bool sortBySize = true;
747 	heap_allocator *heap = NULL;
748 
749 	for (int32 i = 1; i < argc; i++) {
750 		if (strcmp(argv[i], "-c") == 0) {
751 			sortBySize = false;
752 		} else if (strcmp(argv[i], "-h") == 0) {
753 			uint64 heapAddress;
754 			if (++i >= argc
755 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
756 				print_debugger_command_usage(argv[0]);
757 				return 0;
758 			}
759 
760 			heap = (heap_allocator*)(addr_t)heapAddress;
761 		} else {
762 			print_debugger_command_usage(argv[0]);
763 			return 0;
764 		}
765 	}
766 
767 	sCallerInfoCount = 0;
768 
769 	if (heap != NULL) {
770 		if (!analyze_allocation_callers(heap))
771 			return 0;
772 	} else {
773 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
774 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
775 				return 0;
776 		}
777 	}
778 
779 	// sort the array
780 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
781 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
782 
783 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
784 		sortBySize ? "size" : "count");
785 
786 	kprintf("     count        size      caller\n");
787 	kprintf("----------------------------------\n");
788 	for (int32 i = 0; i < sCallerInfoCount; i++) {
789 		caller_info& info = sCallerInfoTable[i];
790 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
791 
792 		const char *symbol;
793 		const char *imageName;
794 		bool exactMatch;
795 		addr_t baseAddress;
796 
797 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
798 				&imageName, &exactMatch) == B_OK) {
799 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
800 				info.caller - baseAddress, imageName,
801 				exactMatch ? "" : " (nearest)");
802 		} else
803 			kprintf("\n");
804 	}
805 
806 	return 0;
807 }
808 
809 #endif // KERNEL_HEAP_LEAK_CHECK
810 
811 
812 #if PARANOID_HEAP_VALIDATION
813 static void
814 heap_validate_heap(heap_allocator *heap)
815 {
816 	ReadLocker areaReadLocker(heap->area_lock);
817 	for (uint32 i = 0; i < heap->bin_count; i++)
818 		mutex_lock(&heap->bins[i].lock);
819 	MutexLocker pageLocker(heap->page_lock);
820 
821 	uint32 totalPageCount = 0;
822 	uint32 totalFreePageCount = 0;
823 	heap_area *area = heap->all_areas;
824 	while (area != NULL) {
825 		// validate the free pages list
826 		uint32 freePageCount = 0;
827 		heap_page *lastPage = NULL;
828 		heap_page *page = area->free_pages;
829 		while (page) {
830 			if ((addr_t)page < (addr_t)&area->page_table[0]
831 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
832 				panic("free page is not part of the page table\n");
833 
834 			if (page->index >= area->page_count)
835 				panic("free page has invalid index\n");
836 
837 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
838 				panic("free page index does not lead to target page\n");
839 
840 			if (page->prev != lastPage)
841 				panic("free page entry has invalid prev link\n");
842 
843 			if (page->in_use)
844 				panic("free page marked as in use\n");
845 
846 			lastPage = page;
847 			page = page->next;
848 			freePageCount++;
849 		}
850 
851 		totalPageCount += freePageCount;
852 		totalFreePageCount += freePageCount;
853 		if (area->free_page_count != freePageCount)
854 			panic("free page count doesn't match free page list\n");
855 
856 		// validate the page table
857 		uint32 usedPageCount = 0;
858 		for (uint32 i = 0; i < area->page_count; i++) {
859 			if (area->page_table[i].in_use)
860 				usedPageCount++;
861 		}
862 
863 		totalPageCount += usedPageCount;
864 		if (freePageCount + usedPageCount != area->page_count) {
865 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
866 				freePageCount, usedPageCount, area->page_count);
867 		}
868 
869 		area = area->all_next;
870 	}
871 
872 	// validate the areas
873 	area = heap->areas;
874 	heap_area *lastArea = NULL;
875 	uint32 lastFreeCount = 0;
876 	while (area != NULL) {
877 		if (area->free_page_count < lastFreeCount)
878 			panic("size ordering of area list broken\n");
879 
880 		if (area->prev != lastArea)
881 			panic("area list entry has invalid prev link\n");
882 
883 		lastArea = area;
884 		lastFreeCount = area->free_page_count;
885 		area = area->next;
886 	}
887 
888 	lastArea = NULL;
889 	area = heap->all_areas;
890 	while (area != NULL) {
891 		if (lastArea != NULL && lastArea->base < area->base)
892 			panic("base ordering of all_areas list broken\n");
893 
894 		lastArea = area;
895 		area = area->all_next;
896 	}
897 
898 	// validate the bins
899 	for (uint32 i = 0; i < heap->bin_count; i++) {
900 		heap_bin *bin = &heap->bins[i];
901 		heap_page *lastPage = NULL;
902 		heap_page *page = bin->page_list;
903 		lastFreeCount = 0;
904 		while (page) {
905 			area = heap->all_areas;
906 			while (area) {
907 				if (area == page->area)
908 					break;
909 				area = area->all_next;
910 			}
911 
912 			if (area == NULL) {
913 				panic("page area not present in area list\n");
914 				page = page->next;
915 				continue;
916 			}
917 
918 			if ((addr_t)page < (addr_t)&area->page_table[0]
919 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
920 				panic("used page is not part of the page table\n");
921 
922 			if (page->index >= area->page_count)
923 				panic("used page has invalid index\n");
924 
925 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
926 				panic("used page index does not lead to target page\n");
927 
928 			if (page->prev != lastPage) {
929 				panic("used page entry has invalid prev link (%p vs %p bin "
930 					"%lu)\n", page->prev, lastPage, i);
931 			}
932 
933 			if (!page->in_use)
934 				panic("used page marked as not in use\n");
935 
936 			if (page->bin_index != i) {
937 				panic("used page with bin index %u in page list of bin %lu\n",
938 					page->bin_index, i);
939 			}
940 
941 			if (page->free_count < lastFreeCount)
942 				panic("ordering of bin page list broken\n");
943 
944 			// validate the free list
945 			uint32 freeSlotsCount = 0;
946 			addr_t *element = page->free_list;
947 			addr_t pageBase = area->base + page->index * heap->page_size;
948 			while (element) {
949 				if ((addr_t)element < pageBase
950 					|| (addr_t)element >= pageBase + heap->page_size)
951 					panic("free list entry out of page range\n");
952 
953 				if (((addr_t)element - pageBase) % bin->element_size != 0)
954 					panic("free list entry not on a element boundary\n");
955 
956 				element = (addr_t *)*element;
957 				freeSlotsCount++;
958 			}
959 
960 			uint32 slotCount = bin->max_free_count;
961 			if (page->empty_index > slotCount) {
962 				panic("empty index beyond slot count (%u with %lu slots)\n",
963 					page->empty_index, slotCount);
964 			}
965 
966 			freeSlotsCount += (slotCount - page->empty_index);
967 			if (freeSlotsCount > slotCount)
968 				panic("more free slots than fit into the page\n");
969 
970 			lastPage = page;
971 			lastFreeCount = page->free_count;
972 			page = page->next;
973 		}
974 	}
975 
976 	pageLocker.Unlock();
977 	for (uint32 i = 0; i < heap->bin_count; i++)
978 		mutex_unlock(&heap->bins[i].lock);
979 	areaReadLocker.Unlock();
980 }
981 #endif // PARANOID_HEAP_VALIDATION
982 
983 
984 // #pragma mark - Heap functions
985 
986 
987 void
988 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
989 {
990 	heap_area *area = (heap_area *)base;
991 	area->area = areaID;
992 
993 	base += sizeof(heap_area);
994 	size -= sizeof(heap_area);
995 
996 	uint32 pageCount = size / heap->page_size;
997 	size_t pageTableSize = pageCount * sizeof(heap_page);
998 	area->page_table = (heap_page *)base;
999 	base += pageTableSize;
1000 	size -= pageTableSize;
1001 
1002 	// the rest is now actually usable memory (rounded to the next page)
1003 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1004 	area->size = size & ~(B_PAGE_SIZE - 1);
1005 
1006 	// now we know the real page count
1007 	pageCount = area->size / heap->page_size;
1008 	area->page_count = pageCount;
1009 
1010 	// zero out the page table and fill in page indexes
1011 	memset((void *)area->page_table, 0, pageTableSize);
1012 	for (uint32 i = 0; i < pageCount; i++) {
1013 		area->page_table[i].area = area;
1014 		area->page_table[i].index = i;
1015 	}
1016 
1017 	// add all pages up into the free pages list
1018 	for (uint32 i = 1; i < pageCount; i++) {
1019 		area->page_table[i - 1].next = &area->page_table[i];
1020 		area->page_table[i].prev = &area->page_table[i - 1];
1021 	}
1022 	area->free_pages = &area->page_table[0];
1023 	area->free_page_count = pageCount;
1024 	area->page_table[0].prev = NULL;
1025 	area->next = NULL;
1026 
1027 	WriteLocker areaWriteLocker(heap->area_lock);
1028 	MutexLocker pageLocker(heap->page_lock);
1029 	if (heap->areas == NULL) {
1030 		// it's the only (empty) area in that heap
1031 		area->prev = NULL;
1032 		heap->areas = area;
1033 	} else {
1034 		// link in this area as the last one as it is completely empty
1035 		heap_area *lastArea = heap->areas;
1036 		while (lastArea->next != NULL)
1037 			lastArea = lastArea->next;
1038 
1039 		lastArea->next = area;
1040 		area->prev = lastArea;
1041 	}
1042 
1043 	// insert this area in the all_areas list so it stays ordered by base
1044 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1045 		area->all_next = heap->all_areas;
1046 		heap->all_areas = area;
1047 	} else {
1048 		heap_area *insert = heap->all_areas;
1049 		while (insert->all_next && insert->all_next->base > area->base)
1050 			insert = insert->all_next;
1051 
1052 		area->all_next = insert->all_next;
1053 		insert->all_next = area;
1054 	}
1055 
1056 	heap->total_pages += area->page_count;
1057 	heap->total_free_pages += area->free_page_count;
1058 
1059 	if (areaID >= 0) {
1060 		// this later on deletable area is yet empty - the empty count will be
1061 		// decremented as soon as this area is used for the first time
1062 		heap->empty_areas++;
1063 	}
1064 
1065 	pageLocker.Unlock();
1066 	areaWriteLocker.Unlock();
1067 
1068 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1069 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1070 		area->base + area->size);
1071 }
1072 
1073 
1074 static status_t
1075 heap_remove_area(heap_allocator *heap, heap_area *area)
1076 {
1077 	if (area->free_page_count != area->page_count) {
1078 		panic("tried removing heap area that has still pages in use");
1079 		return B_ERROR;
1080 	}
1081 
1082 	if (area->prev == NULL && area->next == NULL) {
1083 		panic("tried removing the last non-full heap area");
1084 		return B_ERROR;
1085 	}
1086 
1087 	if (heap->areas == area)
1088 		heap->areas = area->next;
1089 	if (area->prev != NULL)
1090 		area->prev->next = area->next;
1091 	if (area->next != NULL)
1092 		area->next->prev = area->prev;
1093 
1094 	if (heap->all_areas == area)
1095 		heap->all_areas = area->all_next;
1096 	else {
1097 		heap_area *previous = heap->all_areas;
1098 		while (previous) {
1099 			if (previous->all_next == area) {
1100 				previous->all_next = area->all_next;
1101 				break;
1102 			}
1103 
1104 			previous = previous->all_next;
1105 		}
1106 
1107 		if (previous == NULL)
1108 			panic("removing heap area that is not in all list");
1109 	}
1110 
1111 	heap->total_pages -= area->page_count;
1112 	heap->total_free_pages -= area->free_page_count;
1113 
1114 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1115 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1116 		heap->name, heap);
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 heap_allocator *
1123 heap_create_allocator(const char *name, addr_t base, size_t size,
1124 	const heap_class *heapClass, bool allocateOnHeap)
1125 {
1126 	heap_allocator *heap;
1127 	if (allocateOnHeap) {
1128 		// allocate seperately on the heap
1129 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1130 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1131 	} else {
1132 		// use up the first part of the area
1133 		heap = (heap_allocator *)base;
1134 		base += sizeof(heap_allocator);
1135 		size -= sizeof(heap_allocator);
1136 	}
1137 
1138 	heap->name = name;
1139 	heap->page_size = heapClass->page_size;
1140 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1141 	heap->areas = heap->all_areas = NULL;
1142 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1143 
1144 #if KERNEL_HEAP_LEAK_CHECK
1145 	heap->get_caller = &get_caller;
1146 #endif
1147 
1148 	heap->bin_count = 0;
1149 	size_t binSize = 0, lastSize = 0;
1150 	uint32 count = heap->page_size / heapClass->min_bin_size;
1151 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1152 		if (heap->bin_count >= MAX_BIN_COUNT)
1153 			panic("heap configuration invalid - max bin count reached\n");
1154 
1155 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1156 		if (binSize == lastSize)
1157 			continue;
1158 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1159 			continue;
1160 
1161 		heap_bin *bin = &heap->bins[heap->bin_count];
1162 		mutex_init(&bin->lock, "heap bin lock");
1163 		bin->element_size = binSize;
1164 		bin->max_free_count = heap->page_size / binSize;
1165 		bin->page_list = NULL;
1166 		heap->bin_count++;
1167 	};
1168 
1169 	if (!allocateOnHeap) {
1170 		base += heap->bin_count * sizeof(heap_bin);
1171 		size -= heap->bin_count * sizeof(heap_bin);
1172 	}
1173 
1174 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1175 	mutex_init(&heap->page_lock, "heap page lock");
1176 
1177 	heap_add_area(heap, -1, base, size);
1178 	return heap;
1179 }
1180 
1181 
1182 static inline void
1183 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1184 {
1185 	area->free_page_count += pageCount;
1186 	heap->total_free_pages += pageCount;
1187 
1188 	if (area->free_page_count == pageCount) {
1189 		// we need to add ourselfs to the area list of the heap
1190 		area->prev = NULL;
1191 		area->next = heap->areas;
1192 		if (area->next)
1193 			area->next->prev = area;
1194 		heap->areas = area;
1195 	} else {
1196 		// we might need to move back in the area list
1197 		if (area->next && area->next->free_page_count < area->free_page_count) {
1198 			// move ourselfs so the list stays ordered
1199 			heap_area *insert = area->next;
1200 			while (insert->next
1201 				&& insert->next->free_page_count < area->free_page_count)
1202 				insert = insert->next;
1203 
1204 			if (area->prev)
1205 				area->prev->next = area->next;
1206 			if (area->next)
1207 				area->next->prev = area->prev;
1208 			if (heap->areas == area)
1209 				heap->areas = area->next;
1210 
1211 			area->prev = insert;
1212 			area->next = insert->next;
1213 			if (area->next)
1214 				area->next->prev = area;
1215 			insert->next = area;
1216 		}
1217 	}
1218 
1219 	if (area->free_page_count == area->page_count && area->area >= 0)
1220 		heap->empty_areas++;
1221 }
1222 
1223 
1224 static inline void
1225 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1226 {
1227 	if (area->free_page_count == area->page_count && area->area >= 0) {
1228 		// this area was completely empty
1229 		heap->empty_areas--;
1230 	}
1231 
1232 	area->free_page_count -= pageCount;
1233 	heap->total_free_pages -= pageCount;
1234 
1235 	if (area->free_page_count == 0) {
1236 		// the area is now full so we remove it from the area list
1237 		if (area->prev)
1238 			area->prev->next = area->next;
1239 		if (area->next)
1240 			area->next->prev = area->prev;
1241 		if (heap->areas == area)
1242 			heap->areas = area->next;
1243 		area->next = area->prev = NULL;
1244 	} else {
1245 		// we might need to move forward in the area list
1246 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1247 			// move ourselfs so the list stays ordered
1248 			heap_area *insert = area->prev;
1249 			while (insert->prev
1250 				&& insert->prev->free_page_count > area->free_page_count)
1251 				insert = insert->prev;
1252 
1253 			if (area->prev)
1254 				area->prev->next = area->next;
1255 			if (area->next)
1256 				area->next->prev = area->prev;
1257 
1258 			area->prev = insert->prev;
1259 			area->next = insert;
1260 			if (area->prev)
1261 				area->prev->next = area;
1262 			if (heap->areas == insert)
1263 				heap->areas = area;
1264 			insert->prev = area;
1265 		}
1266 	}
1267 }
1268 
1269 
1270 static inline void
1271 heap_link_page(heap_page *page, heap_page **list)
1272 {
1273 	page->prev = NULL;
1274 	page->next = *list;
1275 	if (page->next)
1276 		page->next->prev = page;
1277 	*list = page;
1278 }
1279 
1280 
1281 static inline void
1282 heap_unlink_page(heap_page *page, heap_page **list)
1283 {
1284 	if (page->prev)
1285 		page->prev->next = page->next;
1286 	if (page->next)
1287 		page->next->prev = page->prev;
1288 	if (list && *list == page) {
1289 		*list = page->next;
1290 		if (page->next)
1291 			page->next->prev = NULL;
1292 	}
1293 }
1294 
1295 
1296 static heap_page *
1297 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount)
1298 {
1299 	MutexLocker pageLocker(heap->page_lock);
1300 	heap_area *area = heap->areas;
1301 	while (area) {
1302 		bool found = false;
1303 		int32 first = -1;
1304 		for (uint32 i = 0; i < area->page_count; i++) {
1305 			if (area->page_table[i].in_use) {
1306 				first = -1;
1307 				continue;
1308 			}
1309 
1310 			if (first < 0)
1311 				first = i;
1312 
1313 			if (first >= 0) {
1314 				if ((i + 1 - first) == pageCount) {
1315 					found = true;
1316 					break;
1317 				}
1318 			}
1319 		}
1320 
1321 		if (!found) {
1322 			area = area->next;
1323 			continue;
1324 		}
1325 
1326 		for (uint32 i = first; i < first + pageCount; i++) {
1327 			heap_page *page = &area->page_table[i];
1328 			page->in_use = 1;
1329 			page->bin_index = heap->bin_count;
1330 
1331 			heap_unlink_page(page, &area->free_pages);
1332 
1333 			page->next = page->prev = NULL;
1334 			page->free_list = NULL;
1335 			page->allocation_id = (uint16)first;
1336 		}
1337 
1338 		heap_free_pages_removed(heap, area, pageCount);
1339 		return &area->page_table[first];
1340 	}
1341 
1342 	return NULL;
1343 }
1344 
1345 
1346 static void *
1347 heap_raw_alloc(heap_allocator *heap, size_t size, uint32 binIndex)
1348 {
1349 	TRACE(("raw_alloc: heap %p; size %lu; bin %lu\n", heap, size, binIndex));
1350 	if (binIndex < heap->bin_count) {
1351 		heap_bin *bin = &heap->bins[binIndex];
1352 		MutexLocker binLocker(bin->lock);
1353 
1354 		heap_page *page = bin->page_list;
1355 		if (page == NULL) {
1356 			MutexLocker pageLocker(heap->page_lock);
1357 			heap_area *area = heap->areas;
1358 			if (area == NULL) {
1359 				TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1360 					size));
1361 				return NULL;
1362 			}
1363 
1364 			// by design there are only areas in the list that still have
1365 			// free pages available
1366 			page = area->free_pages;
1367 			area->free_pages = page->next;
1368 			if (page->next)
1369 				page->next->prev = NULL;
1370 
1371 			heap_free_pages_removed(heap, area, 1);
1372 
1373 			if (page->in_use)
1374 				panic("got an in use page from the free pages list\n");
1375 			page->in_use = 1;
1376 
1377 			pageLocker.Unlock();
1378 
1379 			page->bin_index = binIndex;
1380 			page->free_count = bin->max_free_count;
1381 			page->empty_index = 0;
1382 			page->free_list = NULL;
1383 			page->next = page->prev = NULL;
1384 			bin->page_list = page;
1385 		}
1386 
1387 		// we have a page where we have a free slot
1388 		void *address = NULL;
1389 		if (page->free_list) {
1390 			// there's a previously freed entry we can use
1391 			address = page->free_list;
1392 			page->free_list = (addr_t *)*page->free_list;
1393 		} else {
1394 			// the page hasn't been fully allocated so use the next empty_index
1395 			address = (void *)(page->area->base + page->index * heap->page_size
1396 				+ page->empty_index * bin->element_size);
1397 			page->empty_index++;
1398 		}
1399 
1400 		page->free_count--;
1401 		if (page->free_count == 0) {
1402 			// the page is now full so we remove it from the page_list
1403 			bin->page_list = page->next;
1404 			if (page->next)
1405 				page->next->prev = NULL;
1406 			page->next = page->prev = NULL;
1407 		}
1408 
1409 		binLocker.Unlock();
1410 
1411 #if KERNEL_HEAP_LEAK_CHECK
1412 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1413 			+ bin->element_size - sizeof(heap_leak_check_info));
1414 		info->size = size - sizeof(heap_leak_check_info);
1415 		info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1416 		info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1417 		info->caller = heap->get_caller();
1418 #endif
1419 		return address;
1420 	}
1421 
1422 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1423 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount);
1424 	if (firstPage == NULL) {
1425 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1426 			heap, size));
1427 		return NULL;
1428 	}
1429 
1430 #if KERNEL_HEAP_LEAK_CHECK
1431 	heap_leak_check_info *info = (heap_leak_check_info *)(firstPage->area->base
1432 		+ (firstPage->index + pageCount) * heap->page_size
1433 		- sizeof(heap_leak_check_info));
1434 	info->size = size - sizeof(heap_leak_check_info);
1435 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1436 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1437 	info->caller = heap->get_caller();
1438 #endif
1439 	return (void *)(firstPage->area->base + firstPage->index * heap->page_size);
1440 }
1441 
1442 
1443 #if DEBUG
1444 static bool
1445 is_valid_alignment(size_t number)
1446 {
1447 	// this cryptic line accepts zero and all powers of two
1448 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1449 }
1450 #endif
1451 
1452 
1453 inline bool
1454 heap_should_grow(heap_allocator *heap)
1455 {
1456 	// suggest growing if there is less than 20% of a grow size available
1457 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1458 }
1459 
1460 
1461 void *
1462 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1463 {
1464 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1465 
1466 #if DEBUG
1467 	if (!is_valid_alignment(alignment))
1468 		panic("memalign() with an alignment which is not a power of 2\n");
1469 #endif
1470 
1471 #if KERNEL_HEAP_LEAK_CHECK
1472 	size += sizeof(heap_leak_check_info);
1473 #endif
1474 
1475 	if (alignment != 0) {
1476 		// TODO: The alignment is done by ensuring that the element size
1477 		// of the target bin is aligned with the requested alignment. This
1478 		// has the problem that it wastes space because a better (smaller) bin
1479 		// could possibly be selected. We should pick the best bin and check
1480 		// if there is an aligned block in the free list or if a new (page
1481 		// aligned) page has to be allocated anyway.
1482 		size = ROUNDUP(size, alignment);
1483 	}
1484 
1485 	uint32 binIndex;
1486 	for (binIndex = 0; binIndex < heap->bin_count; binIndex++) {
1487 		if (size <= heap->bins[binIndex].element_size)
1488 			break;
1489 	}
1490 
1491 	void *address = heap_raw_alloc(heap, size, binIndex);
1492 
1493 #if KERNEL_HEAP_LEAK_CHECK
1494 	size -= sizeof(heap_leak_check_info);
1495 #endif
1496 
1497 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1498 		size, address));
1499 
1500 	T(Allocate((addr_t)address, size));
1501 	if (address == NULL)
1502 		return address;
1503 
1504 #if PARANOID_KERNEL_MALLOC
1505 	memset(address, 0xcc, size);
1506 #endif
1507 
1508 #if PARANOID_KERNEL_FREE
1509 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1510 	// and the user does not clear it
1511 	if (((uint32 *)address)[1] == 0xdeadbeef)
1512 		((uint32 *)address)[1] = 0xcccccccc;
1513 #endif
1514 
1515 	return address;
1516 }
1517 
1518 
1519 status_t
1520 heap_free(heap_allocator *heap, void *address)
1521 {
1522 	if (address == NULL)
1523 		return B_OK;
1524 
1525 	ReadLocker areaReadLocker(heap->area_lock);
1526 	heap_area *area = heap->all_areas;
1527 	while (area) {
1528 		// since the all_areas list is ordered by base with the biggest
1529 		// base at the top, we need only find the first area with a base
1530 		// smaller than our address to become our only candidate for freeing
1531 		if (area->base <= (addr_t)address) {
1532 			if ((addr_t)address >= area->base + area->size) {
1533 				// none of the other areas can contain the address as the list
1534 				// is ordered
1535 				return B_ENTRY_NOT_FOUND;
1536 			}
1537 
1538 			// this area contains the allocation, we're done searching
1539 			break;
1540 		}
1541 
1542 		area = area->all_next;
1543 	}
1544 
1545 	if (area == NULL) {
1546 		// this address does not belong to us
1547 		return B_ENTRY_NOT_FOUND;
1548 	}
1549 
1550 	TRACE(("free(): asked to free pointer %p\n", address));
1551 
1552 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1553 		/ heap->page_size];
1554 
1555 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1556 		page->bin_index, page->free_count));
1557 
1558 	if (page->bin_index > heap->bin_count) {
1559 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1560 		return B_ERROR;
1561 	}
1562 
1563 	if (page->bin_index < heap->bin_count) {
1564 		// small allocation
1565 		heap_bin *bin = &heap->bins[page->bin_index];
1566 
1567 #if PARANOID_KERNEL_FREE
1568 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1569 			// This block looks like it was freed already, walk the free list
1570 			// on this page to make sure this address doesn't exist.
1571 			MutexLocker binLocker(bin->lock);
1572 			for (addr_t *temp = page->free_list; temp != NULL;
1573 					temp = (addr_t *)*temp) {
1574 				if (temp == address) {
1575 					panic("free(): address %p already exists in page free "
1576 						"list\n", address);
1577 					return B_ERROR;
1578 				}
1579 			}
1580 		}
1581 
1582 		// the first 4 bytes are overwritten with the next free list pointer
1583 		// later
1584 		uint32 *dead = (uint32 *)address;
1585 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1586 			dead[i] = 0xdeadbeef;
1587 #endif
1588 
1589 		MutexLocker binLocker(bin->lock);
1590 		if (((addr_t)address - area->base - page->index
1591 			* heap->page_size) % bin->element_size != 0) {
1592 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1593 				"element size %ld\n", address, bin->element_size);
1594 			return B_ERROR;
1595 		}
1596 
1597 		// add the address to the page free list
1598 		*(addr_t *)address = (addr_t)page->free_list;
1599 		page->free_list = (addr_t *)address;
1600 		page->free_count++;
1601 
1602 		if (page->free_count == bin->max_free_count) {
1603 			// we are now empty, remove the page from the bin list
1604 			MutexLocker pageLocker(heap->page_lock);
1605 			heap_unlink_page(page, &bin->page_list);
1606 			page->in_use = 0;
1607 			heap_link_page(page, &area->free_pages);
1608 			heap_free_pages_added(heap, area, 1);
1609 		} else if (page->free_count == 1) {
1610 			// we need to add ourselfs to the page list of the bin
1611 			heap_link_page(page, &bin->page_list);
1612 		} else {
1613 			// we might need to move back in the free pages list
1614 			if (page->next && page->next->free_count < page->free_count) {
1615 				// move ourselfs so the list stays ordered
1616 				heap_page *insert = page->next;
1617 				while (insert->next
1618 					&& insert->next->free_count < page->free_count)
1619 					insert = insert->next;
1620 
1621 				heap_unlink_page(page, &bin->page_list);
1622 
1623 				page->prev = insert;
1624 				page->next = insert->next;
1625 				if (page->next)
1626 					page->next->prev = page;
1627 				insert->next = page;
1628 			}
1629 		}
1630 	} else {
1631 		// large allocation, just return the pages to the page free list
1632 		uint32 allocationID = page->allocation_id;
1633 		uint32 maxPages = area->page_count - page->index;
1634 		uint32 pageCount = 0;
1635 
1636 		MutexLocker pageLocker(heap->page_lock);
1637 		for (uint32 i = 0; i < maxPages; i++) {
1638 			// loop until we find the end of this allocation
1639 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1640 				|| page[i].allocation_id != allocationID)
1641 				break;
1642 
1643 			// this page still belongs to the same allocation
1644 			page[i].in_use = 0;
1645 			page[i].allocation_id = 0;
1646 
1647 			// return it to the free list
1648 			heap_link_page(&page[i], &area->free_pages);
1649 			pageCount++;
1650 		}
1651 
1652 		heap_free_pages_added(heap, area, pageCount);
1653 	}
1654 
1655 	T(Free((addr_t)address));
1656 	areaReadLocker.Unlock();
1657 
1658 	if (heap->empty_areas > 1) {
1659 		WriteLocker areaWriteLocker(heap->area_lock);
1660 		MutexLocker pageLocker(heap->page_lock);
1661 
1662 		area = heap->areas;
1663 		while (area != NULL && heap->empty_areas > 1) {
1664 			heap_area *next = area->next;
1665 			if (area->area >= 0
1666 				&& area->free_page_count == area->page_count
1667 				&& heap_remove_area(heap, area) == B_OK) {
1668 				delete_area(area->area);
1669 				heap->empty_areas--;
1670 			}
1671 
1672 			area = next;
1673 		}
1674 	}
1675 
1676 	return B_OK;
1677 }
1678 
1679 
1680 #if KERNEL_HEAP_LEAK_CHECK
1681 extern "C" void
1682 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1683 {
1684 	heap->get_caller = getCaller;
1685 }
1686 #endif
1687 
1688 
1689 static status_t
1690 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1691 	size_t newSize)
1692 {
1693 	ReadLocker areaReadLocker(heap->area_lock);
1694 	heap_area *area = heap->all_areas;
1695 	while (area) {
1696 		// since the all_areas list is ordered by base with the biggest
1697 		// base at the top, we need only find the first area with a base
1698 		// smaller than our address to become our only candidate for
1699 		// reallocating
1700 		if (area->base <= (addr_t)address) {
1701 			if ((addr_t)address >= area->base + area->size) {
1702 				// none of the other areas can contain the address as the list
1703 				// is ordered
1704 				return B_ENTRY_NOT_FOUND;
1705 			}
1706 
1707 			// this area contains the allocation, we're done searching
1708 			break;
1709 		}
1710 
1711 		area = area->all_next;
1712 	}
1713 
1714 	if (area == NULL) {
1715 		// this address does not belong to us
1716 		return B_ENTRY_NOT_FOUND;
1717 	}
1718 
1719 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1720 
1721 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1722 		/ heap->page_size];
1723 	if (page->bin_index > heap->bin_count) {
1724 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1725 			page->bin_index);
1726 		return B_ERROR;
1727 	}
1728 
1729 	// find out the size of the old allocation first
1730 	size_t minSize = 0;
1731 	size_t maxSize = 0;
1732 	if (page->bin_index < heap->bin_count) {
1733 		// this was a small allocation
1734 		heap_bin *bin = &heap->bins[page->bin_index];
1735 		maxSize = bin->element_size;
1736 		if (page->bin_index > 0)
1737 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1738 	} else {
1739 		// this was a large allocation
1740 		uint32 allocationID = page->allocation_id;
1741 		uint32 maxPages = area->page_count - page->index;
1742 		maxSize = heap->page_size;
1743 
1744 		MutexLocker pageLocker(heap->page_lock);
1745 		for (uint32 i = 1; i < maxPages; i++) {
1746 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1747 				|| page[i].allocation_id != allocationID)
1748 				break;
1749 
1750 			minSize += heap->page_size;
1751 			maxSize += heap->page_size;
1752 		}
1753 	}
1754 
1755 	areaReadLocker.Unlock();
1756 
1757 #if KERNEL_HEAP_LEAK_CHECK
1758 	newSize += sizeof(heap_leak_check_info);
1759 #endif
1760 
1761 	// does the new allocation simply fit in the old allocation?
1762 	if (newSize > minSize && newSize <= maxSize) {
1763 #if KERNEL_HEAP_LEAK_CHECK
1764 		// update the size info (the info is at the end so stays where it is)
1765 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1766 			+ maxSize - sizeof(heap_leak_check_info));
1767 		info->size = newSize - sizeof(heap_leak_check_info);
1768 		newSize -= sizeof(heap_leak_check_info);
1769 #endif
1770 
1771 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1772 		*newAddress = address;
1773 		return B_OK;
1774 	}
1775 
1776 #if KERNEL_HEAP_LEAK_CHECK
1777 	// new leak check info will be created with the malloc below
1778 	newSize -= sizeof(heap_leak_check_info);
1779 #endif
1780 
1781 	// if not, allocate a new chunk of memory
1782 	*newAddress = memalign(0, newSize);
1783 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1784 	if (*newAddress == NULL) {
1785 		// we tried but it didn't work out, but still the operation is done
1786 		return B_OK;
1787 	}
1788 
1789 	// copy the old data and free the old allocation
1790 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1791 	heap_free(heap, address);
1792 	return B_OK;
1793 }
1794 
1795 
1796 inline uint32
1797 heap_index_for(size_t size, int32 cpu)
1798 {
1799 #if KERNEL_HEAP_LEAK_CHECK
1800 	// take the extra info size into account
1801 	size += sizeof(heap_leak_check_info_s);
1802 #endif
1803 
1804 	uint32 index = 0;
1805 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1806 		if (size <= sHeapClasses[index].max_allocation_size)
1807 			break;
1808 	}
1809 
1810 	return index + cpu * HEAP_CLASS_COUNT;
1811 }
1812 
1813 
1814 static void
1815 deferred_deleter(void *arg, int iteration)
1816 {
1817 	// move entries and deletables to on-stack lists
1818 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1819 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1820 		return;
1821 
1822 	DeferredFreeList entries;
1823 	entries.MoveFrom(&sDeferredFreeList);
1824 
1825 	DeferredDeletableList deletables;
1826 	deletables.MoveFrom(&sDeferredDeletableList);
1827 
1828 	locker.Unlock();
1829 
1830 	// free the entries
1831 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1832 		free(entry);
1833 
1834 	// delete the deletables
1835 	while (DeferredDeletable* deletable = deletables.RemoveHead())
1836 		delete deletable;
1837 }
1838 
1839 
1840 //	#pragma mark -
1841 
1842 
1843 static status_t
1844 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1845 {
1846 	void *address = NULL;
1847 	area_id heapArea = create_area(name, &address,
1848 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1849 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1850 	if (heapArea < B_OK) {
1851 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1852 		return heapArea;
1853 	}
1854 
1855 	heap_add_area(heap, heapArea, (addr_t)address, size);
1856 #if PARANOID_HEAP_VALIDATION
1857 	heap_validate_heap(heap);
1858 #endif
1859 	return B_OK;
1860 }
1861 
1862 
1863 static int32
1864 heap_grow_thread(void *)
1865 {
1866 	while (true) {
1867 		// wait for a request to grow the heap list
1868 		if (acquire_sem(sHeapGrowSem) < B_OK)
1869 			continue;
1870 
1871 		if (sAddGrowHeap) {
1872 			// the grow heap is going to run full soon, try to allocate a new
1873 			// one to make some room.
1874 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1875 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1876 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1877 				dprintf("heap_grower: failed to create new grow heap area\n");
1878 		}
1879 
1880 		for (uint32 i = 0; i < sHeapCount; i++) {
1881 			heap_allocator *heap = sHeaps[i];
1882 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1883 				|| heap_should_grow(heap)) {
1884 				// grow this heap if it is nearly full or if a grow was
1885 				// explicitly requested for this heap (happens when a large
1886 				// allocation cannot be fulfilled due to lack of contiguous
1887 				// pages)
1888 				if (heap_create_new_heap_area(heap, "additional heap",
1889 						HEAP_GROW_SIZE) != B_OK)
1890 					dprintf("heap_grower: failed to create new heap area\n");
1891 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1892 			}
1893 		}
1894 
1895 		// notify anyone waiting for this request
1896 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1897 	}
1898 
1899 	return 0;
1900 }
1901 
1902 
1903 status_t
1904 heap_init(addr_t base, size_t size)
1905 {
1906 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1907 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
1908 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
1909 			&sHeapClasses[i], false);
1910 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
1911 		base += partSize;
1912 		sHeapCount++;
1913 	}
1914 
1915 	// set up some debug commands
1916 	add_debugger_command_etc("heap", &dump_heap_list,
1917 		"Dump infos about the kernel heap(s)",
1918 		"[(\"grow\" | \"stats\" | <heap>)]\n"
1919 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
1920 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
1921 		"given as the argument, currently only the heap count is printed.\n"
1922 		"If <heap> is given, it is interpreted as the address of the heap to\n"
1923 		"print infos about.\n", 0);
1924 #if !KERNEL_HEAP_LEAK_CHECK
1925 	add_debugger_command_etc("allocations", &dump_allocations,
1926 		"Dump current heap allocations",
1927 		"[\"stats\"] [<heap>]\n"
1928 		"If no parameters are given, all current alloactions are dumped.\n"
1929 		"If the optional argument \"stats\" is specified, only the allocation\n"
1930 		"counts and no individual allocations are printed\n"
1931 		"If a specific heap address is given, only allocations of this\n"
1932 		"allocator are dumped\n", 0);
1933 #else // !KERNEL_HEAP_LEAK_CHECK
1934 	add_debugger_command_etc("allocations", &dump_allocations,
1935 		"Dump current heap allocations",
1936 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
1937 		"If no parameters are given, all current alloactions are dumped.\n"
1938 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
1939 		"argument, only allocations matching the team ID, thread ID, caller\n"
1940 		"address or allocated address given in the second argument are printed.\n"
1941 		"If the optional argument \"stats\" is specified, only the allocation\n"
1942 		"counts and no individual allocations are printed\n", 0);
1943 	add_debugger_command_etc("allocations_per_caller",
1944 		&dump_allocations_per_caller,
1945 		"Dump current heap allocations summed up per caller",
1946 		"[ \"-c\" ] [ -h <heap> ]\n"
1947 		"The current allocations will by summed up by caller (their count and\n"
1948 		"size) printed in decreasing order by size or, if \"-c\" is\n"
1949 		"specified, by allocation count. If given <heap> specifies the\n"
1950 		"address of the heap for which to print the allocations.\n", 0);
1951 #endif // KERNEL_HEAP_LEAK_CHECK
1952 	return B_OK;
1953 }
1954 
1955 
1956 status_t
1957 heap_init_post_sem()
1958 {
1959 #if PARANOID_KERNEL_MALLOC
1960 	vm_block_address_range("uninitialized heap memory",
1961 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1962 #endif
1963 #if PARANOID_KERNEL_FREE
1964 	vm_block_address_range("freed heap memory",
1965 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1966 #endif
1967 
1968 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
1969 	if (sHeapGrowSem < 0) {
1970 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
1971 		return B_ERROR;
1972 	}
1973 
1974 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
1975 	if (sHeapGrownNotify < 0) {
1976 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
1977 		return B_ERROR;
1978 	}
1979 
1980 	return B_OK;
1981 }
1982 
1983 
1984 status_t
1985 heap_init_post_thread()
1986 {
1987 	void *address = NULL;
1988 	area_id growHeapArea = create_area("dedicated grow heap", &address,
1989 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
1990 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1991 	if (growHeapArea < 0) {
1992 		panic("heap_init_post_thread(): couldn't allocate dedicate grow heap "
1993 			"area");
1994 		return growHeapArea;
1995 	}
1996 
1997 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
1998 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
1999 	if (sGrowHeap == NULL) {
2000 		panic("heap_init_post_thread(): failed to create dedicated grow heap\n");
2001 		return B_ERROR;
2002 	}
2003 
2004 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2005 		B_URGENT_PRIORITY, NULL);
2006 	if (sHeapGrowThread < 0) {
2007 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2008 		return sHeapGrowThread;
2009 	}
2010 
2011 	// create per-cpu heaps if there's enough memory
2012 	int32 heapCount = MIN(smp_get_num_cpus(),
2013 		(int32)vm_page_num_pages() / 60 / 1024);
2014 	for (int32 i = 1; i < heapCount; i++) {
2015 		addr_t base = 0;
2016 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2017 		area_id perCPUHeapArea = create_area("per cpu inital heap",
2018 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2019 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2020 		if (perCPUHeapArea < 0)
2021 			break;
2022 
2023 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2024 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2025 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2026 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2027 				base, partSize, &sHeapClasses[j], false);
2028 			sLastGrowRequest[heapIndex] = 0;
2029 			sLastHandledGrowRequest[heapIndex] = 0;
2030 			base += partSize;
2031 			sHeapCount++;
2032 		}
2033 	}
2034 
2035 	// run the deferred deleter roughly once a second
2036 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2037 		panic("heap_init_post_thread(): failed to init deferred deleter");
2038 
2039 	send_signal_etc(sHeapGrowThread, SIGCONT, B_DO_NOT_RESCHEDULE);
2040 	return B_OK;
2041 }
2042 
2043 
2044 //	#pragma mark - Public API
2045 
2046 
2047 void *
2048 memalign(size_t alignment, size_t size)
2049 {
2050 	if (!gKernelStartup && !are_interrupts_enabled()) {
2051 		panic("memalign(): called with interrupts disabled\n");
2052 		return NULL;
2053 	}
2054 
2055 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2056 		// don't even attempt such a huge allocation - use areas instead
2057 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2058 			+ alignment, B_PAGE_SIZE);
2059 		if (areaSize < size) {
2060 			// the size overflowed
2061 			return NULL;
2062 		}
2063 
2064 		void *address = NULL;
2065 		area_id allocationArea = create_area("memalign area", &address,
2066 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2067 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2068 		if (allocationArea < B_OK) {
2069 			dprintf("heap: failed to create area for huge allocation\n");
2070 			return NULL;
2071 		}
2072 
2073 		area_allocation_info *info = (area_allocation_info *)address;
2074 		info->magic = kAreaAllocationMagic;
2075 		info->area = allocationArea;
2076 		info->base = address;
2077 		info->size = areaSize;
2078 		info->allocation_size = size;
2079 		info->allocation_alignment = alignment;
2080 
2081 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2082 		if (alignment != 0) {
2083 			ASSERT((addr_t)address % alignment == 0);
2084 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2085 			address = (void *)ROUNDUP((addr_t)address, alignment);
2086 		}
2087 
2088 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2089 			allocationArea, size));
2090 
2091 		info->allocation_base = address;
2092 
2093 #if PARANOID_KERNEL_MALLOC
2094 		memset(address, 0xcc, size);
2095 #endif
2096 		return address;
2097 	}
2098 
2099 	void *result = NULL;
2100 	bool shouldGrow = false;
2101 	int32 cpuCount = MIN(smp_get_num_cpus(),
2102 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2103 	int32 cpuNumber = smp_get_current_cpu();
2104 	for (int32 i = 0; i < cpuCount; i++) {
2105 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2106 		heap_allocator *heap = sHeaps[heapIndex];
2107 		result = heap_memalign(heap, alignment, size);
2108 		if (result != NULL) {
2109 			shouldGrow = heap_should_grow(heap);
2110 			break;
2111 		}
2112 	}
2113 
2114 	if (result == NULL) {
2115 		// request an urgent grow and wait - we don't do it ourselfs here to
2116 		// serialize growing through the grow thread, as otherwise multiple
2117 		// threads hitting this situation (likely when memory ran out) would
2118 		// all add areas
2119 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2120 		sLastGrowRequest[heapIndex]++;
2121 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2122 
2123 		// and then try again
2124 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2125 	} else if (shouldGrow) {
2126 		// should grow sometime soon, notify the grower
2127 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2128 	}
2129 
2130 #if PARANOID_HEAP_VALIDATION
2131 	heap_validate_heap(heap);
2132 #endif
2133 
2134 	if (result == NULL)
2135 		panic("heap: kernel heap has run out of memory\n");
2136 	return result;
2137 }
2138 
2139 
2140 void *
2141 memalign_nogrow(size_t alignment, size_t size)
2142 {
2143 	// use dedicated memory in the grow thread by default
2144 	if (thread_get_current_thread_id() == sHeapGrowThread) {
2145 		void *result = heap_memalign(sGrowHeap, alignment, size);
2146 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
2147 			// hopefully the heap grower will manage to create a new heap
2148 			// before running out of private memory...
2149 			dprintf("heap: requesting new grow heap\n");
2150 			sAddGrowHeap = true;
2151 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2152 		}
2153 
2154 		if (result != NULL)
2155 			return result;
2156 	}
2157 
2158 	// try public memory, there might be something available
2159 	void *result = NULL;
2160 	int32 cpuCount = MIN(smp_get_num_cpus(),
2161 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2162 	int32 cpuNumber = smp_get_current_cpu();
2163 	for (int32 i = 0; i < cpuCount; i++) {
2164 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2165 		heap_allocator *heap = sHeaps[heapIndex];
2166 		result = heap_memalign(heap, alignment, size);
2167 		if (result != NULL)
2168 			return result;
2169 	}
2170 
2171 	// no memory available
2172 	if (thread_get_current_thread_id() == sHeapGrowThread)
2173 		panic("heap: all heaps have run out of memory while growing\n");
2174 	else
2175 		dprintf("heap: all heaps have run out of memory\n");
2176 
2177 	return NULL;
2178 }
2179 
2180 
2181 void *
2182 malloc_nogrow(size_t size)
2183 {
2184 	return memalign_nogrow(0, size);
2185 }
2186 
2187 
2188 void *
2189 malloc(size_t size)
2190 {
2191 	return memalign(0, size);
2192 }
2193 
2194 
2195 void
2196 free(void *address)
2197 {
2198 	if (!gKernelStartup && !are_interrupts_enabled()) {
2199 		panic("free(): called with interrupts disabled\n");
2200 		return;
2201 	}
2202 
2203 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2204 	for (uint32 i = 0; i < sHeapCount; i++) {
2205 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2206 		if (heap_free(heap, address) == B_OK) {
2207 #if PARANOID_HEAP_VALIDATION
2208 			heap_validate_heap(heap);
2209 #endif
2210 			return;
2211 		}
2212 	}
2213 
2214 	// maybe it was allocated from the dedicated grow heap
2215 	if (heap_free(sGrowHeap, address) == B_OK)
2216 		return;
2217 
2218 	// or maybe it was a huge allocation using an area
2219 	area_info areaInfo;
2220 	area_id area = area_for(address);
2221 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2222 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2223 
2224 		// just make extra sure it was allocated by us
2225 		if (info->magic == kAreaAllocationMagic && info->area == area
2226 			&& info->size == areaInfo.size && info->base == areaInfo.address
2227 			&& info->allocation_size < areaInfo.size) {
2228 			delete_area(area);
2229 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2230 				area));
2231 			return;
2232 		}
2233 	}
2234 
2235 	panic("free(): free failed for address %p\n", address);
2236 }
2237 
2238 
2239 void *
2240 realloc(void *address, size_t newSize)
2241 {
2242 	if (!gKernelStartup && !are_interrupts_enabled()) {
2243 		panic("realloc(): called with interrupts disabled\n");
2244 		return NULL;
2245 	}
2246 
2247 	if (address == NULL)
2248 		return memalign(0, newSize);
2249 
2250 	if (newSize == 0) {
2251 		free(address);
2252 		return NULL;
2253 	}
2254 
2255 	void *newAddress = NULL;
2256 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2257 	for (uint32 i = 0; i < sHeapCount; i++) {
2258 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2259 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2260 #if PARANOID_HEAP_VALIDATION
2261 			heap_validate_heap(heap);
2262 #endif
2263 			return newAddress;
2264 		}
2265 	}
2266 
2267 	// maybe it was allocated from the dedicated grow heap
2268 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2269 		return newAddress;
2270 
2271 	// or maybe it was a huge allocation using an area
2272 	area_info areaInfo;
2273 	area_id area = area_for(address);
2274 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2275 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2276 
2277 		// just make extra sure it was allocated by us
2278 		if (info->magic == kAreaAllocationMagic && info->area == area
2279 			&& info->size == areaInfo.size && info->base == areaInfo.address
2280 			&& info->allocation_size < areaInfo.size) {
2281 			size_t available = info->size - ((addr_t)info->allocation_base
2282 				- (addr_t)info->base);
2283 
2284 			if (available >= newSize) {
2285 				// there is enough room available for the newSize
2286 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2287 					"available\n", newSize, area, available));
2288 				info->allocation_size = newSize;
2289 				return address;
2290 			}
2291 
2292 			// have to allocate/copy/free - TODO maybe resize the area instead?
2293 			newAddress = memalign(0, newSize);
2294 			if (newAddress == NULL) {
2295 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2296 					newSize);
2297 				return NULL;
2298 			}
2299 
2300 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2301 			delete_area(area);
2302 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2303 				"old area %ld\n", newAddress, newSize, area));
2304 			return newAddress;
2305 		}
2306 	}
2307 
2308 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2309 		newSize);
2310 	return NULL;
2311 }
2312 
2313 
2314 void *
2315 calloc(size_t numElements, size_t size)
2316 {
2317 	void *address = memalign(0, numElements * size);
2318 	if (address != NULL)
2319 		memset(address, 0, numElements * size);
2320 
2321 	return address;
2322 }
2323 
2324 
2325 void
2326 deferred_free(void *block)
2327 {
2328 	if (block == NULL)
2329 		return;
2330 
2331 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2332 
2333 	InterruptsSpinLocker _(sDeferredFreeListLock);
2334 	sDeferredFreeList.Add(entry);
2335 }
2336 
2337 
2338 void *
2339 malloc_referenced(size_t size)
2340 {
2341 	int32 *referencedData = (int32 *)malloc(size + 4);
2342 	if (referencedData == NULL)
2343 		return NULL;
2344 
2345 	*referencedData = 1;
2346 	return referencedData + 1;
2347 }
2348 
2349 
2350 void *
2351 malloc_referenced_acquire(void *data)
2352 {
2353 	if (data != NULL) {
2354 		int32 *referencedData = (int32 *)data - 1;
2355 		atomic_add(referencedData, 1);
2356 	}
2357 
2358 	return data;
2359 }
2360 
2361 
2362 void
2363 malloc_referenced_release(void *data)
2364 {
2365 	if (data == NULL)
2366 		return;
2367 
2368 	int32 *referencedData = (int32 *)data - 1;
2369 	if (atomic_add(referencedData, -1) < 1)
2370 		free(referencedData);
2371 }
2372 
2373 
2374 DeferredDeletable::~DeferredDeletable()
2375 {
2376 }
2377 
2378 
2379 void
2380 deferred_delete(DeferredDeletable *deletable)
2381 {
2382 	if (deletable == NULL)
2383 		return;
2384 
2385 	InterruptsSpinLocker _(sDeferredFreeListLock);
2386 	sDeferredDeletableList.Add(deletable);
2387 }
2388