xref: /haiku/src/system/kernel/heap.cpp (revision 239222b2369c39dc52df52b0a7cdd6cc0a91bc92)
1 /*
2  * Copyright 2008-2009, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 #include <arch/debug.h>
11 #include <debug.h>
12 #include <elf.h>
13 #include <heap.h>
14 #include <int.h>
15 #include <kernel.h>
16 #include <lock.h>
17 #include <malloc.h>
18 #include <signal.h>
19 #include <string.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tracing.h>
23 #include <util/AutoLock.h>
24 #include <vm.h>
25 #include <vm_page.h>
26 
27 
28 //#define TRACE_HEAP
29 #ifdef TRACE_HEAP
30 #	define TRACE(x) dprintf x
31 #else
32 #	define TRACE(x) ;
33 #endif
34 
35 
36 #if KERNEL_HEAP_LEAK_CHECK
37 typedef struct heap_leak_check_info_s {
38 	addr_t		caller;
39 	size_t		size;
40 	thread_id	thread;
41 	team_id		team;
42 } heap_leak_check_info;
43 
44 struct caller_info {
45 	addr_t		caller;
46 	uint32		count;
47 	uint32		size;
48 };
49 
50 static const int32 kCallerInfoTableSize = 1024;
51 static caller_info sCallerInfoTable[kCallerInfoTableSize];
52 static int32 sCallerInfoCount = 0;
53 #endif
54 
55 
56 typedef struct heap_page_s heap_page;
57 
58 
59 typedef struct heap_area_s {
60 	area_id			area;
61 
62 	addr_t			base;
63 	size_t			size;
64 
65 	uint32			page_count;
66 	uint32			free_page_count;
67 
68 	heap_page *		free_pages;
69 	heap_page *		page_table;
70 
71 	heap_area_s *	prev;
72 	heap_area_s *	next;
73 	heap_area_s *	all_next;
74 } heap_area;
75 
76 
77 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
78 
79 typedef struct heap_page_s {
80 	heap_area *		area;
81 	uint16			index;
82 	uint16			bin_index : 5;
83 	uint16			free_count : 10;
84 	uint16			in_use : 1;
85 	heap_page_s *	next;
86 	heap_page_s *	prev;
87 	union {
88 		uint16			empty_index;
89 		uint16			allocation_id; // used for bin == bin_count allocations
90 	};
91 	addr_t *		free_list;
92 } heap_page;
93 
94 
95 typedef struct heap_bin_s {
96 	mutex		lock;
97 	uint32		element_size;
98 	uint16		max_free_count;
99 	heap_page *	page_list; // sorted so that the desired page is always first
100 } heap_bin;
101 
102 
103 struct heap_allocator_s {
104 	rw_lock		area_lock;
105 	mutex		page_lock;
106 
107 	const char *name;
108 	uint32		bin_count;
109 	uint32		page_size;
110 
111 	uint32		total_pages;
112 	uint32		total_free_pages;
113 	uint32		empty_areas;
114 
115 #if KERNEL_HEAP_LEAK_CHECK
116 	addr_t		(*get_caller)();
117 #endif
118 
119 	heap_bin *	bins;
120 	heap_area *	areas; // sorted so that the desired area is always first
121 	heap_area *	all_areas; // all areas including full ones
122 };
123 
124 
125 static const uint32 kAreaAllocationMagic = 'AAMG';
126 typedef struct area_allocation_info_s {
127 	area_id		area;
128 	void *		base;
129 	uint32		magic;
130 	size_t		size;
131 	size_t		allocation_size;
132 	size_t		allocation_alignment;
133 	void *		allocation_base;
134 } area_allocation_info;
135 
136 
137 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
138 };
139 
140 
141 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
142 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
143 
144 
145 // Heap class configuration
146 #define HEAP_CLASS_COUNT 3
147 static heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
148 	{
149 		"small",					/* name */
150 		50,							/* initial percentage */
151 		B_PAGE_SIZE / 8,			/* max allocation size */
152 		B_PAGE_SIZE,				/* page size */
153 		8,							/* min bin size */
154 		4,							/* bin alignment */
155 		8,							/* min count per page */
156 		16							/* max waste per page */
157 	},
158 	{
159 		"medium",					/* name */
160 		30,							/* initial percentage */
161 		B_PAGE_SIZE * 2,			/* max allocation size */
162 		B_PAGE_SIZE * 8,			/* page size */
163 		B_PAGE_SIZE / 8,			/* min bin size */
164 		32,							/* bin alignment */
165 		4,							/* min count per page */
166 		64							/* max waste per page */
167 	},
168 	{
169 		"large",					/* name */
170 		20,							/* initial percentage */
171 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
172 		B_PAGE_SIZE * 16,			/* page size */
173 		B_PAGE_SIZE * 2,			/* min bin size */
174 		128,						/* bin alignment */
175 		1,							/* min count per page */
176 		256							/* max waste per page */
177 	}
178 };
179 
180 
181 static uint32 sHeapCount;
182 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
183 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
184 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
185 
186 static heap_allocator *sGrowHeap = NULL;
187 static thread_id sHeapGrowThread = -1;
188 static sem_id sHeapGrowSem = -1;
189 static sem_id sHeapGrownNotify = -1;
190 static bool sAddGrowHeap = false;
191 static DeferredFreeList sDeferredFreeList;
192 static DeferredDeletableList sDeferredDeletableList;
193 static spinlock sDeferredFreeListLock;
194 
195 
196 
197 // #pragma mark - Tracing
198 
199 #if KERNEL_HEAP_TRACING
200 namespace KernelHeapTracing {
201 
202 class Allocate : public AbstractTraceEntry {
203 	public:
204 		Allocate(addr_t address, size_t size)
205 			:	fAddress(address),
206 				fSize(size)
207 		{
208 			Initialized();
209 		}
210 
211 		virtual void AddDump(TraceOutput &out)
212 		{
213 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
214 		}
215 
216 	private:
217 		addr_t	fAddress;
218 		size_t	fSize;
219 };
220 
221 
222 class Reallocate : public AbstractTraceEntry {
223 	public:
224 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
225 			:	fOldAddress(oldAddress),
226 				fNewAddress(newAddress),
227 				fNewSize(newSize)
228 		{
229 			Initialized();
230 		};
231 
232 		virtual void AddDump(TraceOutput &out)
233 		{
234 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
235 				fOldAddress, fNewAddress, fNewSize);
236 		}
237 
238 	private:
239 		addr_t	fOldAddress;
240 		addr_t	fNewAddress;
241 		size_t	fNewSize;
242 };
243 
244 
245 class Free : public AbstractTraceEntry {
246 	public:
247 		Free(addr_t address)
248 			:	fAddress(address)
249 		{
250 			Initialized();
251 		};
252 
253 		virtual void AddDump(TraceOutput &out)
254 		{
255 			out.Print("heap free: 0x%08lx", fAddress);
256 		}
257 
258 	private:
259 		addr_t	fAddress;
260 };
261 
262 
263 } // namespace KernelHeapTracing
264 
265 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
266 #else
267 #	define T(x)	;
268 #endif
269 
270 
271 // #pragma mark - Debug functions
272 
273 
274 #if KERNEL_HEAP_LEAK_CHECK
275 static addr_t
276 get_caller()
277 {
278 	// Find the first return address outside of the allocator code. Note, that
279 	// this makes certain assumptions about how the code for the functions
280 	// ends up in the kernel object.
281 	addr_t returnAddresses[5];
282 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1, false);
283 	for (int32 i = 0; i < depth; i++) {
284 		if (returnAddresses[i] < (addr_t)&get_caller
285 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
286 			return returnAddresses[i];
287 		}
288 	}
289 
290 	return 0;
291 }
292 #endif
293 
294 
295 static void
296 dump_page(heap_page *page)
297 {
298 	uint32 count = 0;
299 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
300 		count++;
301 
302 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
303 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
304 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
305 }
306 
307 
308 static void
309 dump_bin(heap_bin *bin)
310 {
311 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p;\n",
312 		bin->element_size, bin->max_free_count, bin->page_list);
313 
314 	for (heap_page *temp = bin->page_list; temp != NULL; temp = temp->next)
315 		dump_page(temp);
316 }
317 
318 
319 static void
320 dump_bin_list(heap_allocator *heap)
321 {
322 	for (uint32 i = 0; i < heap->bin_count; i++)
323 		dump_bin(&heap->bins[i]);
324 	kprintf("\n");
325 }
326 
327 
328 static void
329 dump_allocator_areas(heap_allocator *heap)
330 {
331 	heap_area *area = heap->all_areas;
332 	while (area) {
333 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
334 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
335 			area->size, area->page_count, area->free_pages,
336 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
337 		area = area->all_next;
338 	}
339 
340 	kprintf("\n");
341 }
342 
343 
344 static void
345 dump_allocator(heap_allocator *heap, bool areas, bool bins)
346 {
347 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
348 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
349 		heap->page_size, heap->bin_count, heap->total_pages,
350 		heap->total_free_pages, heap->empty_areas);
351 
352 	if (areas)
353 		dump_allocator_areas(heap);
354 	if (bins)
355 		dump_bin_list(heap);
356 }
357 
358 
359 static int
360 dump_heap_list(int argc, char **argv)
361 {
362 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
363 		// only dump dedicated grow heap info
364 		kprintf("dedicated grow heap:\n");
365 		dump_allocator(sGrowHeap, true, true);
366 		return 0;
367 	}
368 
369 	bool stats = false;
370 	int i = 1;
371 
372 	if (strcmp(argv[1], "stats") == 0) {
373 		stats = true;
374 		i++;
375 	}
376 
377 	uint64 heapAddress = 0;
378 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
379 		print_debugger_command_usage(argv[0]);
380 		return 0;
381 	}
382 
383 	if (heapAddress == 0) {
384 		// dump default kernel heaps
385 		for (uint32 i = 0; i < sHeapCount; i++)
386 			dump_allocator(sHeaps[i], !stats, !stats);
387 	} else {
388 		// dump specified heap
389 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
390 	}
391 
392 	return 0;
393 }
394 
395 
396 #if !KERNEL_HEAP_LEAK_CHECK
397 
398 static int
399 dump_allocations(int argc, char **argv)
400 {
401 	uint64 heapAddress = 0;
402 	bool statsOnly = false;
403 	for (int32 i = 1; i < argc; i++) {
404 		if (strcmp(argv[i], "stats") == 0)
405 			statsOnly = true;
406 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
407 			print_debugger_command_usage(argv[0]);
408 			return 0;
409 		}
410 	}
411 
412 	size_t totalSize = 0;
413 	uint32 totalCount = 0;
414 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
415 		heap_allocator *heap = sHeaps[heapIndex];
416 		if (heapAddress != 0)
417 			heap = (heap_allocator *)(addr_t)heapAddress;
418 
419 		// go through all the pages in all the areas
420 		heap_area *area = heap->all_areas;
421 		while (area) {
422 			for (uint32 i = 0; i < area->page_count; i++) {
423 				heap_page *page = &area->page_table[i];
424 				if (!page->in_use)
425 					continue;
426 
427 				addr_t base = area->base + i * heap->page_size;
428 				if (page->bin_index < heap->bin_count) {
429 					// page is used by a small allocation bin
430 					uint32 elementCount = page->empty_index;
431 					size_t elementSize
432 						= heap->bins[page->bin_index].element_size;
433 					for (uint32 j = 0; j < elementCount;
434 							j++, base += elementSize) {
435 						// walk the free list to see if this element is in use
436 						bool elementInUse = true;
437 						for (addr_t *temp = page->free_list; temp != NULL;
438 								temp = (addr_t *)*temp) {
439 							if ((addr_t)temp == base) {
440 								elementInUse = false;
441 								break;
442 							}
443 						}
444 
445 						if (!elementInUse)
446 							continue;
447 
448 						if (!statsOnly) {
449 							kprintf("address: 0x%08lx; size: %lu bytes\n",
450 								base, elementSize);
451 						}
452 
453 						totalSize += elementSize;
454 						totalCount++;
455 					}
456 				} else {
457 					// page is used by a big allocation, find the page count
458 					uint32 pageCount = 1;
459 					while (i + pageCount < area->page_count
460 						&& area->page_table[i + pageCount].in_use
461 						&& area->page_table[i + pageCount].bin_index
462 							== heap->bin_count
463 						&& area->page_table[i + pageCount].allocation_id
464 							== page->allocation_id)
465 						pageCount++;
466 
467 					size_t size = pageCount * heap->page_size;
468 
469 					if (!statsOnly) {
470 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
471 							size);
472 					}
473 
474 					totalSize += size;
475 					totalCount++;
476 
477 					// skip the allocated pages
478 					i += pageCount - 1;
479 				}
480 			}
481 
482 			area = area->all_next;
483 		}
484 
485 		if (heapAddress != 0)
486 			break;
487 	}
488 
489 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
490 	return 0;
491 }
492 
493 #else // !KERNEL_HEAP_LEAK_CHECK
494 
495 static int
496 dump_allocations(int argc, char **argv)
497 {
498 	team_id team = -1;
499 	thread_id thread = -1;
500 	addr_t caller = 0;
501 	bool statsOnly = false;
502 	for (int32 i = 1; i < argc; i++) {
503 		if (strcmp(argv[i], "team") == 0)
504 			team = strtoul(argv[++i], NULL, 0);
505 		else if (strcmp(argv[i], "thread") == 0)
506 			thread = strtoul(argv[++i], NULL, 0);
507 		else if (strcmp(argv[i], "caller") == 0)
508 			caller = strtoul(argv[++i], NULL, 0);
509 		else if (strcmp(argv[i], "stats") == 0)
510 			statsOnly = true;
511 		else {
512 			print_debugger_command_usage(argv[0]);
513 			return 0;
514 		}
515 	}
516 
517 	size_t totalSize = 0;
518 	uint32 totalCount = 0;
519 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
520 		heap_allocator *heap = sHeaps[heapIndex];
521 
522 		// go through all the pages in all the areas
523 		heap_area *area = heap->all_areas;
524 		while (area) {
525 			heap_leak_check_info *info = NULL;
526 			for (uint32 i = 0; i < area->page_count; i++) {
527 				heap_page *page = &area->page_table[i];
528 				if (!page->in_use)
529 					continue;
530 
531 				addr_t base = area->base + i * heap->page_size;
532 				if (page->bin_index < heap->bin_count) {
533 					// page is used by a small allocation bin
534 					uint32 elementCount = page->empty_index;
535 					size_t elementSize
536 						= heap->bins[page->bin_index].element_size;
537 					for (uint32 j = 0; j < elementCount;
538 							j++, base += elementSize) {
539 						// walk the free list to see if this element is in use
540 						bool elementInUse = true;
541 						for (addr_t *temp = page->free_list; temp != NULL;
542 								temp = (addr_t *)*temp) {
543 							if ((addr_t)temp == base) {
544 								elementInUse = false;
545 								break;
546 							}
547 						}
548 
549 						if (!elementInUse)
550 							continue;
551 
552 						info = (heap_leak_check_info *)(base + elementSize
553 							- sizeof(heap_leak_check_info));
554 
555 						if ((team == -1 || info->team == team)
556 							&& (thread == -1 || info->thread == thread)
557 							&& (caller == 0 || info->caller == caller)) {
558 							// interesting...
559 							if (!statsOnly) {
560 								kprintf("team: % 6ld; thread: % 6ld; "
561 									"address: 0x%08lx; size: %lu bytes; "
562 									"caller: %#lx\n", info->team, info->thread,
563 									base, info->size, info->caller);
564 							}
565 
566 							totalSize += info->size;
567 							totalCount++;
568 						}
569 					}
570 				} else {
571 					// page is used by a big allocation, find the page count
572 					uint32 pageCount = 1;
573 					while (i + pageCount < area->page_count
574 						&& area->page_table[i + pageCount].in_use
575 						&& area->page_table[i + pageCount].bin_index
576 							== heap->bin_count
577 						&& area->page_table[i + pageCount].allocation_id
578 							== page->allocation_id)
579 						pageCount++;
580 
581 					info = (heap_leak_check_info *)(base + pageCount
582 						* heap->page_size - sizeof(heap_leak_check_info));
583 
584 					if ((team == -1 || info->team == team)
585 						&& (thread == -1 || info->thread == thread)
586 						&& (caller == 0 || info->caller == caller)) {
587 						// interesting...
588 						if (!statsOnly) {
589 							kprintf("team: % 6ld; thread: % 6ld;"
590 								" address: 0x%08lx; size: %lu bytes;"
591 								" caller: %#lx\n", info->team, info->thread,
592 								base, info->size, info->caller);
593 						}
594 
595 						totalSize += info->size;
596 						totalCount++;
597 					}
598 
599 					// skip the allocated pages
600 					i += pageCount - 1;
601 				}
602 			}
603 
604 			area = area->all_next;
605 		}
606 	}
607 
608 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
609 	return 0;
610 }
611 
612 
613 static caller_info*
614 get_caller_info(addr_t caller)
615 {
616 	// find the caller info
617 	for (int32 i = 0; i < sCallerInfoCount; i++) {
618 		if (caller == sCallerInfoTable[i].caller)
619 			return &sCallerInfoTable[i];
620 	}
621 
622 	// not found, add a new entry, if there are free slots
623 	if (sCallerInfoCount >= kCallerInfoTableSize)
624 		return NULL;
625 
626 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
627 	info->caller = caller;
628 	info->count = 0;
629 	info->size = 0;
630 
631 	return info;
632 }
633 
634 
635 static int
636 caller_info_compare_size(const void* _a, const void* _b)
637 {
638 	const caller_info* a = (const caller_info*)_a;
639 	const caller_info* b = (const caller_info*)_b;
640 	return (int)(b->size - a->size);
641 }
642 
643 
644 static int
645 caller_info_compare_count(const void* _a, const void* _b)
646 {
647 	const caller_info* a = (const caller_info*)_a;
648 	const caller_info* b = (const caller_info*)_b;
649 	return (int)(b->count - a->count);
650 }
651 
652 
653 static bool
654 analyze_allocation_callers(heap_allocator *heap)
655 {
656 	// go through all the pages in all the areas
657 	heap_area *area = heap->all_areas;
658 	while (area) {
659 		heap_leak_check_info *info = NULL;
660 		for (uint32 i = 0; i < area->page_count; i++) {
661 			heap_page *page = &area->page_table[i];
662 			if (!page->in_use)
663 				continue;
664 
665 			addr_t base = area->base + i * heap->page_size;
666 			if (page->bin_index < heap->bin_count) {
667 				// page is used by a small allocation bin
668 				uint32 elementCount = page->empty_index;
669 				size_t elementSize = heap->bins[page->bin_index].element_size;
670 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
671 					// walk the free list to see if this element is in use
672 					bool elementInUse = true;
673 					for (addr_t *temp = page->free_list; temp != NULL;
674 						temp = (addr_t *)*temp) {
675 						if ((addr_t)temp == base) {
676 							elementInUse = false;
677 							break;
678 						}
679 					}
680 
681 					if (!elementInUse)
682 						continue;
683 
684 					info = (heap_leak_check_info *)(base + elementSize
685 						- sizeof(heap_leak_check_info));
686 
687 					caller_info *callerInfo = get_caller_info(info->caller);
688 					if (callerInfo == NULL) {
689 						kprintf("out of space for caller infos\n");
690 						return 0;
691 					}
692 
693 					callerInfo->count++;
694 					callerInfo->size += info->size;
695 				}
696 			} else {
697 				// page is used by a big allocation, find the page count
698 				uint32 pageCount = 1;
699 				while (i + pageCount < area->page_count
700 					&& area->page_table[i + pageCount].in_use
701 					&& area->page_table[i + pageCount].bin_index
702 						== heap->bin_count
703 					&& area->page_table[i + pageCount].allocation_id
704 						== page->allocation_id) {
705 					pageCount++;
706 				}
707 
708 				info = (heap_leak_check_info *)(base + pageCount
709 					* heap->page_size - sizeof(heap_leak_check_info));
710 
711 				caller_info *callerInfo = get_caller_info(info->caller);
712 				if (callerInfo == NULL) {
713 					kprintf("out of space for caller infos\n");
714 					return false;
715 				}
716 
717 				callerInfo->count++;
718 				callerInfo->size += info->size;
719 
720 				// skip the allocated pages
721 				i += pageCount - 1;
722 			}
723 		}
724 
725 		area = area->all_next;
726 	}
727 
728 	return true;
729 }
730 
731 
732 static int
733 dump_allocations_per_caller(int argc, char **argv)
734 {
735 	bool sortBySize = true;
736 	heap_allocator *heap = NULL;
737 
738 	for (int32 i = 1; i < argc; i++) {
739 		if (strcmp(argv[i], "-c") == 0) {
740 			sortBySize = false;
741 		} else if (strcmp(argv[i], "-h") == 0) {
742 			uint64 heapAddress;
743 			if (++i >= argc
744 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
745 				print_debugger_command_usage(argv[0]);
746 				return 0;
747 			}
748 
749 			heap = (heap_allocator*)(addr_t)heapAddress;
750 		} else {
751 			print_debugger_command_usage(argv[0]);
752 			return 0;
753 		}
754 	}
755 
756 	sCallerInfoCount = 0;
757 
758 	if (heap != NULL) {
759 		if (!analyze_allocation_callers(heap))
760 			return 0;
761 	} else {
762 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
763 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
764 				return 0;
765 		}
766 	}
767 
768 	// sort the array
769 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
770 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
771 
772 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
773 		sortBySize ? "size" : "count");
774 
775 	kprintf("     count        size      caller\n");
776 	kprintf("----------------------------------\n");
777 	for (int32 i = 0; i < sCallerInfoCount; i++) {
778 		caller_info& info = sCallerInfoTable[i];
779 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
780 
781 		const char *symbol;
782 		const char *imageName;
783 		bool exactMatch;
784 		addr_t baseAddress;
785 
786 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
787 				&imageName, &exactMatch) == B_OK) {
788 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
789 				info.caller - baseAddress, imageName,
790 				exactMatch ? "" : " (nearest)");
791 		} else
792 			kprintf("\n");
793 	}
794 
795 	return 0;
796 }
797 
798 #endif // KERNEL_HEAP_LEAK_CHECK
799 
800 
801 #if PARANOID_HEAP_VALIDATION
802 static void
803 heap_validate_heap(heap_allocator *heap)
804 {
805 	ReadLocker areaReadLocker(heap->area_lock);
806 	for (uint32 i = 0; i < heap->bin_count; i++)
807 		mutex_lock(&heap->bins[i].lock);
808 	MutexLocker pageLocker(heap->page_lock);
809 
810 	uint32 totalPageCount = 0;
811 	uint32 totalFreePageCount = 0;
812 	heap_area *area = heap->all_areas;
813 	while (area != NULL) {
814 		// validate the free pages list
815 		uint32 freePageCount = 0;
816 		heap_page *lastPage = NULL;
817 		heap_page *page = area->free_pages;
818 		while (page) {
819 			if ((addr_t)page < (addr_t)&area->page_table[0]
820 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
821 				panic("free page is not part of the page table\n");
822 
823 			if (page->index >= area->page_count)
824 				panic("free page has invalid index\n");
825 
826 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
827 				panic("free page index does not lead to target page\n");
828 
829 			if (page->prev != lastPage)
830 				panic("free page entry has invalid prev link\n");
831 
832 			if (page->in_use)
833 				panic("free page marked as in use\n");
834 
835 			lastPage = page;
836 			page = page->next;
837 			freePageCount++;
838 		}
839 
840 		totalPageCount += freePageCount;
841 		totalFreePageCount += freePageCount;
842 		if (area->free_page_count != freePageCount)
843 			panic("free page count doesn't match free page list\n");
844 
845 		// validate the page table
846 		uint32 usedPageCount = 0;
847 		for (uint32 i = 0; i < area->page_count; i++) {
848 			if (area->page_table[i].in_use)
849 				usedPageCount++;
850 		}
851 
852 		totalPageCount += usedPageCount;
853 		if (freePageCount + usedPageCount != area->page_count) {
854 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
855 				freePageCount, usedPageCount, area->page_count);
856 		}
857 
858 		area = area->all_next;
859 	}
860 
861 	// validate the areas
862 	area = heap->areas;
863 	heap_area *lastArea = NULL;
864 	uint32 lastFreeCount = 0;
865 	while (area != NULL) {
866 		if (area->free_page_count < lastFreeCount)
867 			panic("size ordering of area list broken\n");
868 
869 		if (area->prev != lastArea)
870 			panic("area list entry has invalid prev link\n");
871 
872 		lastArea = area;
873 		lastFreeCount = area->free_page_count;
874 		area = area->next;
875 	}
876 
877 	lastArea = NULL;
878 	area = heap->all_areas;
879 	while (area != NULL) {
880 		if (lastArea != NULL && lastArea->base < area->base)
881 			panic("base ordering of all_areas list broken\n");
882 
883 		lastArea = area;
884 		area = area->all_next;
885 	}
886 
887 	// validate the bins
888 	for (uint32 i = 0; i < heap->bin_count; i++) {
889 		heap_bin *bin = &heap->bins[i];
890 		heap_page *lastPage = NULL;
891 		heap_page *page = bin->page_list;
892 		lastFreeCount = 0;
893 		while (page) {
894 			area = heap->all_areas;
895 			while (area) {
896 				if (area == page->area)
897 					break;
898 				area = area->all_next;
899 			}
900 
901 			if (area == NULL) {
902 				panic("page area not present in area list\n");
903 				page = page->next;
904 				continue;
905 			}
906 
907 			if ((addr_t)page < (addr_t)&area->page_table[0]
908 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
909 				panic("used page is not part of the page table\n");
910 
911 			if (page->index >= area->page_count)
912 				panic("used page has invalid index\n");
913 
914 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
915 				panic("used page index does not lead to target page\n");
916 
917 			if (page->prev != lastPage) {
918 				panic("used page entry has invalid prev link (%p vs %p bin "
919 					"%lu)\n", page->prev, lastPage, i);
920 			}
921 
922 			if (!page->in_use)
923 				panic("used page marked as not in use\n");
924 
925 			if (page->bin_index != i) {
926 				panic("used page with bin index %u in page list of bin %lu\n",
927 					page->bin_index, i);
928 			}
929 
930 			if (page->free_count < lastFreeCount)
931 				panic("ordering of bin page list broken\n");
932 
933 			// validate the free list
934 			uint32 freeSlotsCount = 0;
935 			addr_t *element = page->free_list;
936 			addr_t pageBase = area->base + page->index * heap->page_size;
937 			while (element) {
938 				if ((addr_t)element < pageBase
939 					|| (addr_t)element >= pageBase + heap->page_size)
940 					panic("free list entry out of page range\n");
941 
942 				if (((addr_t)element - pageBase) % bin->element_size != 0)
943 					panic("free list entry not on a element boundary\n");
944 
945 				element = (addr_t *)*element;
946 				freeSlotsCount++;
947 			}
948 
949 			uint32 slotCount = bin->max_free_count;
950 			if (page->empty_index > slotCount) {
951 				panic("empty index beyond slot count (%u with %lu slots)\n",
952 					page->empty_index, slotCount);
953 			}
954 
955 			freeSlotsCount += (slotCount - page->empty_index);
956 			if (freeSlotsCount > slotCount)
957 				panic("more free slots than fit into the page\n");
958 
959 			lastPage = page;
960 			lastFreeCount = page->free_count;
961 			page = page->next;
962 		}
963 	}
964 
965 	pageLocker.Unlock();
966 	for (uint32 i = 0; i < heap->bin_count; i++)
967 		mutex_unlock(&heap->bins[i].lock);
968 	areaReadLocker.Unlock();
969 }
970 #endif // PARANOID_HEAP_VALIDATION
971 
972 
973 // #pragma mark - Heap functions
974 
975 
976 void
977 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
978 {
979 	heap_area *area = (heap_area *)base;
980 	area->area = areaID;
981 
982 	base += sizeof(heap_area);
983 	size -= sizeof(heap_area);
984 
985 	uint32 pageCount = size / heap->page_size;
986 	size_t pageTableSize = pageCount * sizeof(heap_page);
987 	area->page_table = (heap_page *)base;
988 	base += pageTableSize;
989 	size -= pageTableSize;
990 
991 	// the rest is now actually usable memory (rounded to the next page)
992 	area->base = ROUNDUP(base, B_PAGE_SIZE);
993 	area->size = size & ~(B_PAGE_SIZE - 1);
994 
995 	// now we know the real page count
996 	pageCount = area->size / heap->page_size;
997 	area->page_count = pageCount;
998 
999 	// zero out the page table and fill in page indexes
1000 	memset((void *)area->page_table, 0, pageTableSize);
1001 	for (uint32 i = 0; i < pageCount; i++) {
1002 		area->page_table[i].area = area;
1003 		area->page_table[i].index = i;
1004 	}
1005 
1006 	// add all pages up into the free pages list
1007 	for (uint32 i = 1; i < pageCount; i++) {
1008 		area->page_table[i - 1].next = &area->page_table[i];
1009 		area->page_table[i].prev = &area->page_table[i - 1];
1010 	}
1011 	area->free_pages = &area->page_table[0];
1012 	area->free_page_count = pageCount;
1013 	area->page_table[0].prev = NULL;
1014 	area->next = NULL;
1015 
1016 	WriteLocker areaWriteLocker(heap->area_lock);
1017 	MutexLocker pageLocker(heap->page_lock);
1018 	if (heap->areas == NULL) {
1019 		// it's the only (empty) area in that heap
1020 		area->prev = NULL;
1021 		heap->areas = area;
1022 	} else {
1023 		// link in this area as the last one as it is completely empty
1024 		heap_area *lastArea = heap->areas;
1025 		while (lastArea->next != NULL)
1026 			lastArea = lastArea->next;
1027 
1028 		lastArea->next = area;
1029 		area->prev = lastArea;
1030 	}
1031 
1032 	// insert this area in the all_areas list so it stays ordered by base
1033 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1034 		area->all_next = heap->all_areas;
1035 		heap->all_areas = area;
1036 	} else {
1037 		heap_area *insert = heap->all_areas;
1038 		while (insert->all_next && insert->all_next->base > area->base)
1039 			insert = insert->all_next;
1040 
1041 		area->all_next = insert->all_next;
1042 		insert->all_next = area;
1043 	}
1044 
1045 	heap->total_pages += area->page_count;
1046 	heap->total_free_pages += area->free_page_count;
1047 
1048 	if (areaID >= 0) {
1049 		// this later on deletable area is yet empty - the empty count will be
1050 		// decremented as soon as this area is used for the first time
1051 		heap->empty_areas++;
1052 	}
1053 
1054 	pageLocker.Unlock();
1055 	areaWriteLocker.Unlock();
1056 
1057 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1058 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1059 		area->base + area->size);
1060 }
1061 
1062 
1063 static status_t
1064 heap_remove_area(heap_allocator *heap, heap_area *area)
1065 {
1066 	if (area->free_page_count != area->page_count) {
1067 		panic("tried removing heap area that has still pages in use");
1068 		return B_ERROR;
1069 	}
1070 
1071 	if (area->prev == NULL && area->next == NULL) {
1072 		panic("tried removing the last non-full heap area");
1073 		return B_ERROR;
1074 	}
1075 
1076 	if (heap->areas == area)
1077 		heap->areas = area->next;
1078 	if (area->prev != NULL)
1079 		area->prev->next = area->next;
1080 	if (area->next != NULL)
1081 		area->next->prev = area->prev;
1082 
1083 	if (heap->all_areas == area)
1084 		heap->all_areas = area->all_next;
1085 	else {
1086 		heap_area *previous = heap->all_areas;
1087 		while (previous) {
1088 			if (previous->all_next == area) {
1089 				previous->all_next = area->all_next;
1090 				break;
1091 			}
1092 
1093 			previous = previous->all_next;
1094 		}
1095 
1096 		if (previous == NULL)
1097 			panic("removing heap area that is not in all list");
1098 	}
1099 
1100 	heap->total_pages -= area->page_count;
1101 	heap->total_free_pages -= area->free_page_count;
1102 
1103 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1104 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1105 		heap->name, heap);
1106 
1107 	return B_OK;
1108 }
1109 
1110 
1111 heap_allocator *
1112 heap_create_allocator(const char *name, addr_t base, size_t size,
1113 	const heap_class *heapClass, bool allocateOnHeap)
1114 {
1115 	heap_allocator *heap;
1116 	if (allocateOnHeap) {
1117 		// allocate seperately on the heap
1118 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1119 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1120 	} else {
1121 		// use up the first part of the area
1122 		heap = (heap_allocator *)base;
1123 		base += sizeof(heap_allocator);
1124 		size -= sizeof(heap_allocator);
1125 	}
1126 
1127 	heap->name = name;
1128 	heap->page_size = heapClass->page_size;
1129 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1130 	heap->areas = heap->all_areas = NULL;
1131 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1132 
1133 #if KERNEL_HEAP_LEAK_CHECK
1134 	heap->get_caller = &get_caller;
1135 #endif
1136 
1137 	heap->bin_count = 0;
1138 	size_t binSize = 0, lastSize = 0;
1139 	uint32 count = heap->page_size / heapClass->min_bin_size;
1140 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1141 		if (heap->bin_count >= MAX_BIN_COUNT)
1142 			panic("heap configuration invalid - max bin count reached\n");
1143 
1144 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1145 		if (binSize == lastSize)
1146 			continue;
1147 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1148 			continue;
1149 
1150 		heap_bin *bin = &heap->bins[heap->bin_count];
1151 		mutex_init(&bin->lock, "heap bin lock");
1152 		bin->element_size = binSize;
1153 		bin->max_free_count = heap->page_size / binSize;
1154 		bin->page_list = NULL;
1155 		heap->bin_count++;
1156 	};
1157 
1158 	if (!allocateOnHeap) {
1159 		base += heap->bin_count * sizeof(heap_bin);
1160 		size -= heap->bin_count * sizeof(heap_bin);
1161 	}
1162 
1163 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1164 	mutex_init(&heap->page_lock, "heap page lock");
1165 
1166 	heap_add_area(heap, -1, base, size);
1167 	return heap;
1168 }
1169 
1170 
1171 static inline void
1172 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1173 {
1174 	area->free_page_count += pageCount;
1175 	heap->total_free_pages += pageCount;
1176 
1177 	if (area->free_page_count == pageCount) {
1178 		// we need to add ourselfs to the area list of the heap
1179 		area->prev = NULL;
1180 		area->next = heap->areas;
1181 		if (area->next)
1182 			area->next->prev = area;
1183 		heap->areas = area;
1184 	} else {
1185 		// we might need to move back in the area list
1186 		if (area->next && area->next->free_page_count < area->free_page_count) {
1187 			// move ourselfs so the list stays ordered
1188 			heap_area *insert = area->next;
1189 			while (insert->next
1190 				&& insert->next->free_page_count < area->free_page_count)
1191 				insert = insert->next;
1192 
1193 			if (area->prev)
1194 				area->prev->next = area->next;
1195 			if (area->next)
1196 				area->next->prev = area->prev;
1197 			if (heap->areas == area)
1198 				heap->areas = area->next;
1199 
1200 			area->prev = insert;
1201 			area->next = insert->next;
1202 			if (area->next)
1203 				area->next->prev = area;
1204 			insert->next = area;
1205 		}
1206 	}
1207 
1208 	if (area->free_page_count == area->page_count && area->area >= 0)
1209 		heap->empty_areas++;
1210 }
1211 
1212 
1213 static inline void
1214 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1215 {
1216 	if (area->free_page_count == area->page_count && area->area >= 0) {
1217 		// this area was completely empty
1218 		heap->empty_areas--;
1219 	}
1220 
1221 	area->free_page_count -= pageCount;
1222 	heap->total_free_pages -= pageCount;
1223 
1224 	if (area->free_page_count == 0) {
1225 		// the area is now full so we remove it from the area list
1226 		if (area->prev)
1227 			area->prev->next = area->next;
1228 		if (area->next)
1229 			area->next->prev = area->prev;
1230 		if (heap->areas == area)
1231 			heap->areas = area->next;
1232 		area->next = area->prev = NULL;
1233 	} else {
1234 		// we might need to move forward in the area list
1235 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1236 			// move ourselfs so the list stays ordered
1237 			heap_area *insert = area->prev;
1238 			while (insert->prev
1239 				&& insert->prev->free_page_count > area->free_page_count)
1240 				insert = insert->prev;
1241 
1242 			if (area->prev)
1243 				area->prev->next = area->next;
1244 			if (area->next)
1245 				area->next->prev = area->prev;
1246 
1247 			area->prev = insert->prev;
1248 			area->next = insert;
1249 			if (area->prev)
1250 				area->prev->next = area;
1251 			if (heap->areas == insert)
1252 				heap->areas = area;
1253 			insert->prev = area;
1254 		}
1255 	}
1256 }
1257 
1258 
1259 static inline void
1260 heap_link_page(heap_page *page, heap_page **list)
1261 {
1262 	page->prev = NULL;
1263 	page->next = *list;
1264 	if (page->next)
1265 		page->next->prev = page;
1266 	*list = page;
1267 }
1268 
1269 
1270 static inline void
1271 heap_unlink_page(heap_page *page, heap_page **list)
1272 {
1273 	if (page->prev)
1274 		page->prev->next = page->next;
1275 	if (page->next)
1276 		page->next->prev = page->prev;
1277 	if (list && *list == page) {
1278 		*list = page->next;
1279 		if (page->next)
1280 			page->next->prev = NULL;
1281 	}
1282 }
1283 
1284 
1285 static heap_page *
1286 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount)
1287 {
1288 	MutexLocker pageLocker(heap->page_lock);
1289 	heap_area *area = heap->areas;
1290 	while (area) {
1291 		bool found = false;
1292 		int32 first = -1;
1293 		for (uint32 i = 0; i < area->page_count; i++) {
1294 			if (area->page_table[i].in_use) {
1295 				first = -1;
1296 				continue;
1297 			}
1298 
1299 			if (first < 0)
1300 				first = i;
1301 
1302 			if (first >= 0) {
1303 				if ((i + 1 - first) == pageCount) {
1304 					found = true;
1305 					break;
1306 				}
1307 			}
1308 		}
1309 
1310 		if (!found) {
1311 			area = area->next;
1312 			continue;
1313 		}
1314 
1315 		for (uint32 i = first; i < first + pageCount; i++) {
1316 			heap_page *page = &area->page_table[i];
1317 			page->in_use = 1;
1318 			page->bin_index = heap->bin_count;
1319 
1320 			heap_unlink_page(page, &area->free_pages);
1321 
1322 			page->next = page->prev = NULL;
1323 			page->free_list = NULL;
1324 			page->allocation_id = (uint16)first;
1325 		}
1326 
1327 		heap_free_pages_removed(heap, area, pageCount);
1328 		return &area->page_table[first];
1329 	}
1330 
1331 	return NULL;
1332 }
1333 
1334 
1335 static void *
1336 heap_raw_alloc(heap_allocator *heap, size_t size, uint32 binIndex)
1337 {
1338 	TRACE(("raw_alloc: heap %p; size %lu; bin %lu\n", heap, size, binIndex));
1339 	if (binIndex < heap->bin_count) {
1340 		heap_bin *bin = &heap->bins[binIndex];
1341 		MutexLocker binLocker(bin->lock);
1342 
1343 		heap_page *page = bin->page_list;
1344 		if (page == NULL) {
1345 			MutexLocker pageLocker(heap->page_lock);
1346 			heap_area *area = heap->areas;
1347 			if (area == NULL) {
1348 				TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1349 					size));
1350 				return NULL;
1351 			}
1352 
1353 			// by design there are only areas in the list that still have
1354 			// free pages available
1355 			page = area->free_pages;
1356 			area->free_pages = page->next;
1357 			if (page->next)
1358 				page->next->prev = NULL;
1359 
1360 			heap_free_pages_removed(heap, area, 1);
1361 
1362 			if (page->in_use)
1363 				panic("got an in use page from the free pages list\n");
1364 			page->in_use = 1;
1365 
1366 			pageLocker.Unlock();
1367 
1368 			page->bin_index = binIndex;
1369 			page->free_count = bin->max_free_count;
1370 			page->empty_index = 0;
1371 			page->free_list = NULL;
1372 			page->next = page->prev = NULL;
1373 			bin->page_list = page;
1374 		}
1375 
1376 		// we have a page where we have a free slot
1377 		void *address = NULL;
1378 		if (page->free_list) {
1379 			// there's a previously freed entry we can use
1380 			address = page->free_list;
1381 			page->free_list = (addr_t *)*page->free_list;
1382 		} else {
1383 			// the page hasn't been fully allocated so use the next empty_index
1384 			address = (void *)(page->area->base + page->index * heap->page_size
1385 				+ page->empty_index * bin->element_size);
1386 			page->empty_index++;
1387 		}
1388 
1389 		page->free_count--;
1390 		if (page->free_count == 0) {
1391 			// the page is now full so we remove it from the page_list
1392 			bin->page_list = page->next;
1393 			if (page->next)
1394 				page->next->prev = NULL;
1395 			page->next = page->prev = NULL;
1396 		}
1397 
1398 		binLocker.Unlock();
1399 
1400 #if KERNEL_HEAP_LEAK_CHECK
1401 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1402 			+ bin->element_size - sizeof(heap_leak_check_info));
1403 		info->size = size - sizeof(heap_leak_check_info);
1404 		info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1405 		info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1406 		info->caller = heap->get_caller();
1407 #endif
1408 		return address;
1409 	}
1410 
1411 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1412 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount);
1413 	if (firstPage == NULL) {
1414 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1415 			heap, size));
1416 		return NULL;
1417 	}
1418 
1419 #if KERNEL_HEAP_LEAK_CHECK
1420 	heap_leak_check_info *info = (heap_leak_check_info *)(firstPage->area->base
1421 		+ (firstPage->index + pageCount) * heap->page_size
1422 		- sizeof(heap_leak_check_info));
1423 	info->size = size - sizeof(heap_leak_check_info);
1424 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1425 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1426 	info->caller = heap->get_caller();
1427 #endif
1428 	return (void *)(firstPage->area->base + firstPage->index * heap->page_size);
1429 }
1430 
1431 
1432 #if DEBUG
1433 static bool
1434 is_valid_alignment(size_t number)
1435 {
1436 	// this cryptic line accepts zero and all powers of two
1437 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1438 }
1439 #endif
1440 
1441 
1442 inline bool
1443 heap_should_grow(heap_allocator *heap)
1444 {
1445 	// suggest growing if there is less than 20% of a grow size available
1446 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1447 }
1448 
1449 
1450 void *
1451 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1452 {
1453 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1454 
1455 #if DEBUG
1456 	if (!is_valid_alignment(alignment))
1457 		panic("memalign() with an alignment which is not a power of 2\n");
1458 #endif
1459 
1460 #if KERNEL_HEAP_LEAK_CHECK
1461 	size += sizeof(heap_leak_check_info);
1462 #endif
1463 
1464 	if (alignment != 0) {
1465 		// TODO: The alignment is done by ensuring that the element size
1466 		// of the target bin is aligned with the requested alignment. This
1467 		// has the problem that it wastes space because a better (smaller) bin
1468 		// could possibly be selected. We should pick the best bin and check
1469 		// if there is an aligned block in the free list or if a new (page
1470 		// aligned) page has to be allocated anyway.
1471 		size = ROUNDUP(size, alignment);
1472 	}
1473 
1474 	uint32 binIndex;
1475 	for (binIndex = 0; binIndex < heap->bin_count; binIndex++) {
1476 		if (size <= heap->bins[binIndex].element_size)
1477 			break;
1478 	}
1479 
1480 	void *address = heap_raw_alloc(heap, size, binIndex);
1481 
1482 #if KERNEL_HEAP_LEAK_CHECK
1483 	size -= sizeof(heap_leak_check_info);
1484 #endif
1485 
1486 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1487 		size, address));
1488 
1489 	T(Allocate((addr_t)address, size));
1490 	if (address == NULL)
1491 		return address;
1492 
1493 #if PARANOID_KERNEL_MALLOC
1494 	memset(address, 0xcc, size);
1495 #endif
1496 
1497 #if PARANOID_KERNEL_FREE
1498 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1499 	// and the user does not clear it
1500 	if (((uint32 *)address)[1] == 0xdeadbeef)
1501 		((uint32 *)address)[1] = 0xcccccccc;
1502 #endif
1503 
1504 	return address;
1505 }
1506 
1507 
1508 status_t
1509 heap_free(heap_allocator *heap, void *address)
1510 {
1511 	if (address == NULL)
1512 		return B_OK;
1513 
1514 	ReadLocker areaReadLocker(heap->area_lock);
1515 	heap_area *area = heap->all_areas;
1516 	while (area) {
1517 		// since the all_areas list is ordered by base with the biggest
1518 		// base at the top, we need only find the first area with a base
1519 		// smaller than our address to become our only candidate for freeing
1520 		if (area->base <= (addr_t)address) {
1521 			if ((addr_t)address >= area->base + area->size) {
1522 				// none of the other areas can contain the address as the list
1523 				// is ordered
1524 				return B_ENTRY_NOT_FOUND;
1525 			}
1526 
1527 			// this area contains the allocation, we're done searching
1528 			break;
1529 		}
1530 
1531 		area = area->all_next;
1532 	}
1533 
1534 	if (area == NULL) {
1535 		// this address does not belong to us
1536 		return B_ENTRY_NOT_FOUND;
1537 	}
1538 
1539 	TRACE(("free(): asked to free pointer %p\n", address));
1540 
1541 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1542 		/ heap->page_size];
1543 
1544 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1545 		page->bin_index, page->free_count));
1546 
1547 	if (page->bin_index > heap->bin_count) {
1548 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1549 		return B_ERROR;
1550 	}
1551 
1552 	if (page->bin_index < heap->bin_count) {
1553 		// small allocation
1554 		heap_bin *bin = &heap->bins[page->bin_index];
1555 
1556 #if PARANOID_KERNEL_FREE
1557 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1558 			// This block looks like it was freed already, walk the free list
1559 			// on this page to make sure this address doesn't exist.
1560 			MutexLocker binLocker(bin->lock);
1561 			for (addr_t *temp = page->free_list; temp != NULL;
1562 					temp = (addr_t *)*temp) {
1563 				if (temp == address) {
1564 					panic("free(): address %p already exists in page free "
1565 						"list\n", address);
1566 					return B_ERROR;
1567 				}
1568 			}
1569 		}
1570 
1571 		// the first 4 bytes are overwritten with the next free list pointer
1572 		// later
1573 		uint32 *dead = (uint32 *)address;
1574 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1575 			dead[i] = 0xdeadbeef;
1576 #endif
1577 
1578 		MutexLocker binLocker(bin->lock);
1579 		if (((addr_t)address - area->base - page->index
1580 			* heap->page_size) % bin->element_size != 0) {
1581 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1582 				"element size %ld\n", address, bin->element_size);
1583 			return B_ERROR;
1584 		}
1585 
1586 		// add the address to the page free list
1587 		*(addr_t *)address = (addr_t)page->free_list;
1588 		page->free_list = (addr_t *)address;
1589 		page->free_count++;
1590 
1591 		if (page->free_count == bin->max_free_count) {
1592 			// we are now empty, remove the page from the bin list
1593 			MutexLocker pageLocker(heap->page_lock);
1594 			heap_unlink_page(page, &bin->page_list);
1595 			page->in_use = 0;
1596 			heap_link_page(page, &area->free_pages);
1597 			heap_free_pages_added(heap, area, 1);
1598 		} else if (page->free_count == 1) {
1599 			// we need to add ourselfs to the page list of the bin
1600 			heap_link_page(page, &bin->page_list);
1601 		} else {
1602 			// we might need to move back in the free pages list
1603 			if (page->next && page->next->free_count < page->free_count) {
1604 				// move ourselfs so the list stays ordered
1605 				heap_page *insert = page->next;
1606 				while (insert->next
1607 					&& insert->next->free_count < page->free_count)
1608 					insert = insert->next;
1609 
1610 				heap_unlink_page(page, &bin->page_list);
1611 
1612 				page->prev = insert;
1613 				page->next = insert->next;
1614 				if (page->next)
1615 					page->next->prev = page;
1616 				insert->next = page;
1617 			}
1618 		}
1619 	} else {
1620 		// large allocation, just return the pages to the page free list
1621 		uint32 allocationID = page->allocation_id;
1622 		uint32 maxPages = area->page_count - page->index;
1623 		uint32 pageCount = 0;
1624 
1625 		MutexLocker pageLocker(heap->page_lock);
1626 		for (uint32 i = 0; i < maxPages; i++) {
1627 			// loop until we find the end of this allocation
1628 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1629 				|| page[i].allocation_id != allocationID)
1630 				break;
1631 
1632 			// this page still belongs to the same allocation
1633 			page[i].in_use = 0;
1634 			page[i].allocation_id = 0;
1635 
1636 			// return it to the free list
1637 			heap_link_page(&page[i], &area->free_pages);
1638 			pageCount++;
1639 		}
1640 
1641 		heap_free_pages_added(heap, area, pageCount);
1642 	}
1643 
1644 	T(Free((addr_t)address));
1645 	areaReadLocker.Unlock();
1646 
1647 	if (heap->empty_areas > 1) {
1648 		WriteLocker areaWriteLocker(heap->area_lock);
1649 		MutexLocker pageLocker(heap->page_lock);
1650 
1651 		area = heap->areas;
1652 		while (area != NULL && heap->empty_areas > 1) {
1653 			heap_area *next = area->next;
1654 			if (area->area >= 0
1655 				&& area->free_page_count == area->page_count
1656 				&& heap_remove_area(heap, area) == B_OK) {
1657 				delete_area(area->area);
1658 				heap->empty_areas--;
1659 			}
1660 
1661 			area = next;
1662 		}
1663 	}
1664 
1665 	return B_OK;
1666 }
1667 
1668 
1669 #if KERNEL_HEAP_LEAK_CHECK
1670 extern "C" void
1671 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1672 {
1673 	heap->get_caller = getCaller;
1674 }
1675 #endif
1676 
1677 
1678 static status_t
1679 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1680 	size_t newSize)
1681 {
1682 	ReadLocker areaReadLocker(heap->area_lock);
1683 	heap_area *area = heap->all_areas;
1684 	while (area) {
1685 		// since the all_areas list is ordered by base with the biggest
1686 		// base at the top, we need only find the first area with a base
1687 		// smaller than our address to become our only candidate for
1688 		// reallocating
1689 		if (area->base <= (addr_t)address) {
1690 			if ((addr_t)address >= area->base + area->size) {
1691 				// none of the other areas can contain the address as the list
1692 				// is ordered
1693 				return B_ENTRY_NOT_FOUND;
1694 			}
1695 
1696 			// this area contains the allocation, we're done searching
1697 			break;
1698 		}
1699 
1700 		area = area->all_next;
1701 	}
1702 
1703 	if (area == NULL) {
1704 		// this address does not belong to us
1705 		return B_ENTRY_NOT_FOUND;
1706 	}
1707 
1708 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1709 
1710 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1711 		/ heap->page_size];
1712 	if (page->bin_index > heap->bin_count) {
1713 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1714 			page->bin_index);
1715 		return B_ERROR;
1716 	}
1717 
1718 	// find out the size of the old allocation first
1719 	size_t minSize = 0;
1720 	size_t maxSize = 0;
1721 	if (page->bin_index < heap->bin_count) {
1722 		// this was a small allocation
1723 		heap_bin *bin = &heap->bins[page->bin_index];
1724 		maxSize = bin->element_size;
1725 		if (page->bin_index > 0)
1726 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1727 	} else {
1728 		// this was a large allocation
1729 		uint32 allocationID = page->allocation_id;
1730 		uint32 maxPages = area->page_count - page->index;
1731 		maxSize = heap->page_size;
1732 
1733 		MutexLocker pageLocker(heap->page_lock);
1734 		for (uint32 i = 1; i < maxPages; i++) {
1735 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1736 				|| page[i].allocation_id != allocationID)
1737 				break;
1738 
1739 			minSize += heap->page_size;
1740 			maxSize += heap->page_size;
1741 		}
1742 	}
1743 
1744 	areaReadLocker.Unlock();
1745 
1746 #if KERNEL_HEAP_LEAK_CHECK
1747 	newSize += sizeof(heap_leak_check_info);
1748 #endif
1749 
1750 	// does the new allocation simply fit in the old allocation?
1751 	if (newSize > minSize && newSize <= maxSize) {
1752 #if KERNEL_HEAP_LEAK_CHECK
1753 		// update the size info (the info is at the end so stays where it is)
1754 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1755 			+ maxSize - sizeof(heap_leak_check_info));
1756 		info->size = newSize - sizeof(heap_leak_check_info);
1757 		newSize -= sizeof(heap_leak_check_info);
1758 #endif
1759 
1760 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1761 		*newAddress = address;
1762 		return B_OK;
1763 	}
1764 
1765 #if KERNEL_HEAP_LEAK_CHECK
1766 	// new leak check info will be created with the malloc below
1767 	newSize -= sizeof(heap_leak_check_info);
1768 #endif
1769 
1770 	// if not, allocate a new chunk of memory
1771 	*newAddress = memalign(0, newSize);
1772 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1773 	if (*newAddress == NULL) {
1774 		// we tried but it didn't work out, but still the operation is done
1775 		return B_OK;
1776 	}
1777 
1778 	// copy the old data and free the old allocation
1779 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1780 	heap_free(heap, address);
1781 	return B_OK;
1782 }
1783 
1784 
1785 inline uint32
1786 heap_index_for(size_t size, int32 cpu)
1787 {
1788 #if KERNEL_HEAP_LEAK_CHECK
1789 	// take the extra info size into account
1790 	size += sizeof(heap_leak_check_info_s);
1791 #endif
1792 
1793 	uint32 index = 0;
1794 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1795 		if (size <= sHeapClasses[index].max_allocation_size)
1796 			break;
1797 	}
1798 
1799 	return index + cpu * HEAP_CLASS_COUNT;
1800 }
1801 
1802 
1803 static void
1804 deferred_deleter(void *arg, int iteration)
1805 {
1806 	// move entries and deletables to on-stack lists
1807 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1808 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1809 		return;
1810 
1811 	DeferredFreeList entries;
1812 	entries.MoveFrom(&sDeferredFreeList);
1813 
1814 	DeferredDeletableList deletables;
1815 	deletables.MoveFrom(&sDeferredDeletableList);
1816 
1817 	locker.Unlock();
1818 
1819 	// free the entries
1820 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1821 		free(entry);
1822 
1823 	// delete the deletables
1824 	while (DeferredDeletable* deletable = deletables.RemoveHead())
1825 		delete deletable;
1826 }
1827 
1828 
1829 //	#pragma mark -
1830 
1831 
1832 static status_t
1833 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1834 {
1835 	void *address = NULL;
1836 	area_id heapArea = create_area(name, &address,
1837 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1838 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1839 	if (heapArea < B_OK) {
1840 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1841 		return heapArea;
1842 	}
1843 
1844 	heap_add_area(heap, heapArea, (addr_t)address, size);
1845 #if PARANOID_HEAP_VALIDATION
1846 	heap_validate_heap(heap);
1847 #endif
1848 	return B_OK;
1849 }
1850 
1851 
1852 static int32
1853 heap_grow_thread(void *)
1854 {
1855 	while (true) {
1856 		// wait for a request to grow the heap list
1857 		if (acquire_sem(sHeapGrowSem) < B_OK)
1858 			continue;
1859 
1860 		if (sAddGrowHeap) {
1861 			// the grow heap is going to run full soon, try to allocate a new
1862 			// one to make some room.
1863 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1864 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1865 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1866 				dprintf("heap_grower: failed to create new grow heap area\n");
1867 		}
1868 
1869 		for (uint32 i = 0; i < sHeapCount; i++) {
1870 			heap_allocator *heap = sHeaps[i];
1871 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1872 				|| heap_should_grow(heap)) {
1873 				// grow this heap if it is nearly full or if a grow was
1874 				// explicitly requested for this heap (happens when a large
1875 				// allocation cannot be fulfilled due to lack of contiguous
1876 				// pages)
1877 				if (heap_create_new_heap_area(heap, "additional heap",
1878 						HEAP_GROW_SIZE) != B_OK)
1879 					dprintf("heap_grower: failed to create new heap area\n");
1880 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1881 			}
1882 		}
1883 
1884 		// notify anyone waiting for this request
1885 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1886 	}
1887 
1888 	return 0;
1889 }
1890 
1891 
1892 status_t
1893 heap_init(addr_t base, size_t size)
1894 {
1895 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1896 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
1897 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
1898 			&sHeapClasses[i], false);
1899 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
1900 		base += partSize;
1901 		sHeapCount++;
1902 	}
1903 
1904 	// set up some debug commands
1905 	add_debugger_command_etc("heap", &dump_heap_list,
1906 		"Dump infos about the kernel heap(s)",
1907 		"[(\"grow\" | \"stats\" | <heap>)]\n"
1908 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
1909 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
1910 		"given as the argument, currently only the heap count is printed.\n"
1911 		"If <heap> is given, it is interpreted as the address of the heap to\n"
1912 		"print infos about.\n", 0);
1913 #if !KERNEL_HEAP_LEAK_CHECK
1914 	add_debugger_command_etc("allocations", &dump_allocations,
1915 		"Dump current heap allocations",
1916 		"[\"stats\"] [<heap>]\n"
1917 		"If no parameters are given, all current alloactions are dumped.\n"
1918 		"If the optional argument \"stats\" is specified, only the allocation\n"
1919 		"counts and no individual allocations are printed\n"
1920 		"If a specific heap address is given, only allocations of this\n"
1921 		"allocator are dumped\n", 0);
1922 #else // !KERNEL_HEAP_LEAK_CHECK
1923 	add_debugger_command_etc("allocations", &dump_allocations,
1924 		"Dump current heap allocations",
1925 		"[(\"team\" | \"thread\") <id>] [ \"caller\" <address> ] [\"stats\"]\n"
1926 		"If no parameters are given, all current alloactions are dumped.\n"
1927 		"If \"team\", \"thread\", and/or \"caller\" is specified as the first\n"
1928 		"argument, only allocations matching the team id, thread id, or \n"
1929 		"caller address given in the second argument are printed.\n"
1930 		"If the optional argument \"stats\" is specified, only the allocation\n"
1931 		"counts and no individual allocations are printed\n", 0);
1932 	add_debugger_command_etc("allocations_per_caller",
1933 		&dump_allocations_per_caller,
1934 		"Dump current heap allocations summed up per caller",
1935 		"[ \"-c\" ] [ -h <heap> ]\n"
1936 		"The current allocations will by summed up by caller (their count and\n"
1937 		"size) printed in decreasing order by size or, if \"-c\" is\n"
1938 		"specified, by allocation count. If given <heap> specifies the\n"
1939 		"address of the heap for which to print the allocations.\n", 0);
1940 #endif // KERNEL_HEAP_LEAK_CHECK
1941 	return B_OK;
1942 }
1943 
1944 
1945 status_t
1946 heap_init_post_sem()
1947 {
1948 #if PARANOID_KERNEL_MALLOC
1949 	vm_block_address_range("uninitialized heap memory",
1950 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1951 #endif
1952 #if PARANOID_KERNEL_FREE
1953 	vm_block_address_range("freed heap memory",
1954 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1955 #endif
1956 
1957 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
1958 	if (sHeapGrowSem < 0) {
1959 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
1960 		return B_ERROR;
1961 	}
1962 
1963 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
1964 	if (sHeapGrownNotify < 0) {
1965 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
1966 		return B_ERROR;
1967 	}
1968 
1969 	return B_OK;
1970 }
1971 
1972 
1973 status_t
1974 heap_init_post_thread()
1975 {
1976 	void *address = NULL;
1977 	area_id growHeapArea = create_area("dedicated grow heap", &address,
1978 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
1979 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1980 	if (growHeapArea < 0) {
1981 		panic("heap_init_post_thread(): couldn't allocate dedicate grow heap "
1982 			"area");
1983 		return growHeapArea;
1984 	}
1985 
1986 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
1987 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
1988 	if (sGrowHeap == NULL) {
1989 		panic("heap_init_post_thread(): failed to create dedicated grow heap\n");
1990 		return B_ERROR;
1991 	}
1992 
1993 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
1994 		B_URGENT_PRIORITY, NULL);
1995 	if (sHeapGrowThread < 0) {
1996 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
1997 		return sHeapGrowThread;
1998 	}
1999 
2000 	// create per-cpu heaps if there's enough memory
2001 	int32 heapCount = MIN(smp_get_num_cpus(),
2002 		(int32)vm_page_num_pages() / 60 / 1024);
2003 	for (int32 i = 1; i < heapCount; i++) {
2004 		addr_t base = 0;
2005 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2006 		area_id perCPUHeapArea = create_area("per cpu inital heap",
2007 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2008 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2009 		if (perCPUHeapArea < 0)
2010 			break;
2011 
2012 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2013 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2014 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2015 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2016 				base, partSize, &sHeapClasses[j], false);
2017 			sLastGrowRequest[heapIndex] = 0;
2018 			sLastHandledGrowRequest[heapIndex] = 0;
2019 			base += partSize;
2020 			sHeapCount++;
2021 		}
2022 	}
2023 
2024 	// run the deferred deleter roughly once a second
2025 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2026 		panic("heap_init_post_thread(): failed to init deferred deleter");
2027 
2028 	send_signal_etc(sHeapGrowThread, SIGCONT, B_DO_NOT_RESCHEDULE);
2029 	return B_OK;
2030 }
2031 
2032 
2033 //	#pragma mark - Public API
2034 
2035 
2036 void *
2037 memalign(size_t alignment, size_t size)
2038 {
2039 	if (!gKernelStartup && !are_interrupts_enabled()) {
2040 		panic("memalign(): called with interrupts disabled\n");
2041 		return NULL;
2042 	}
2043 
2044 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2045 		// don't even attempt such a huge allocation - use areas instead
2046 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2047 			+ alignment, B_PAGE_SIZE);
2048 		if (areaSize < size) {
2049 			// the size overflowed
2050 			return NULL;
2051 		}
2052 
2053 		void *address = NULL;
2054 		area_id allocationArea = create_area("memalign area", &address,
2055 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2056 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2057 		if (allocationArea < B_OK) {
2058 			dprintf("heap: failed to create area for huge allocation\n");
2059 			return NULL;
2060 		}
2061 
2062 		area_allocation_info *info = (area_allocation_info *)address;
2063 		info->magic = kAreaAllocationMagic;
2064 		info->area = allocationArea;
2065 		info->base = address;
2066 		info->size = areaSize;
2067 		info->allocation_size = size;
2068 		info->allocation_alignment = alignment;
2069 
2070 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2071 		if (alignment != 0) {
2072 			ASSERT((addr_t)address % alignment == 0);
2073 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2074 			address = (void *)ROUNDUP((addr_t)address, alignment);
2075 		}
2076 
2077 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2078 			allocationArea, size));
2079 
2080 		info->allocation_base = address;
2081 
2082 #if PARANOID_KERNEL_MALLOC
2083 		memset(address, 0xcc, size);
2084 #endif
2085 		return address;
2086 	}
2087 
2088 	void *result = NULL;
2089 	bool shouldGrow = false;
2090 	int32 cpuCount = MIN(smp_get_num_cpus(),
2091 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2092 	int32 cpuNumber = smp_get_current_cpu();
2093 	for (int32 i = 0; i < cpuCount; i++) {
2094 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2095 		heap_allocator *heap = sHeaps[heapIndex];
2096 		result = heap_memalign(heap, alignment, size);
2097 		if (result != NULL) {
2098 			shouldGrow = heap_should_grow(heap);
2099 			break;
2100 		}
2101 	}
2102 
2103 	if (result == NULL) {
2104 		// request an urgent grow and wait - we don't do it ourselfs here to
2105 		// serialize growing through the grow thread, as otherwise multiple
2106 		// threads hitting this situation (likely when memory ran out) would
2107 		// all add areas
2108 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2109 		sLastGrowRequest[heapIndex]++;
2110 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2111 
2112 		// and then try again
2113 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2114 	} else if (shouldGrow) {
2115 		// should grow sometime soon, notify the grower
2116 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2117 	}
2118 
2119 #if PARANOID_HEAP_VALIDATION
2120 	heap_validate_heap(heap);
2121 #endif
2122 
2123 	if (result == NULL)
2124 		panic("heap: kernel heap has run out of memory\n");
2125 	return result;
2126 }
2127 
2128 
2129 void *
2130 memalign_nogrow(size_t alignment, size_t size)
2131 {
2132 	// use dedicated memory in the grow thread by default
2133 	if (thread_get_current_thread_id() == sHeapGrowThread) {
2134 		void *result = heap_memalign(sGrowHeap, alignment, size);
2135 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
2136 			// hopefully the heap grower will manage to create a new heap
2137 			// before running out of private memory...
2138 			dprintf("heap: requesting new grow heap\n");
2139 			sAddGrowHeap = true;
2140 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2141 		}
2142 
2143 		if (result != NULL)
2144 			return result;
2145 	}
2146 
2147 	// try public memory, there might be something available
2148 	void *result = NULL;
2149 	int32 cpuCount = MIN(smp_get_num_cpus(),
2150 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2151 	int32 cpuNumber = smp_get_current_cpu();
2152 	for (int32 i = 0; i < cpuCount; i++) {
2153 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2154 		heap_allocator *heap = sHeaps[heapIndex];
2155 		result = heap_memalign(heap, alignment, size);
2156 		if (result != NULL)
2157 			return result;
2158 	}
2159 
2160 	// no memory available
2161 	if (thread_get_current_thread_id() == sHeapGrowThread)
2162 		panic("heap: all heaps have run out of memory while growing\n");
2163 	else
2164 		dprintf("heap: all heaps have run out of memory\n");
2165 
2166 	return NULL;
2167 }
2168 
2169 
2170 void *
2171 malloc_nogrow(size_t size)
2172 {
2173 	return memalign_nogrow(0, size);
2174 }
2175 
2176 
2177 void *
2178 malloc(size_t size)
2179 {
2180 	return memalign(0, size);
2181 }
2182 
2183 
2184 void
2185 free(void *address)
2186 {
2187 	if (!gKernelStartup && !are_interrupts_enabled()) {
2188 		panic("free(): called with interrupts disabled\n");
2189 		return;
2190 	}
2191 
2192 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2193 	for (uint32 i = 0; i < sHeapCount; i++) {
2194 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2195 		if (heap_free(heap, address) == B_OK) {
2196 #if PARANOID_HEAP_VALIDATION
2197 			heap_validate_heap(heap);
2198 #endif
2199 			return;
2200 		}
2201 	}
2202 
2203 	// maybe it was allocated from the dedicated grow heap
2204 	if (heap_free(sGrowHeap, address) == B_OK)
2205 		return;
2206 
2207 	// or maybe it was a huge allocation using an area
2208 	area_info areaInfo;
2209 	area_id area = area_for(address);
2210 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2211 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2212 
2213 		// just make extra sure it was allocated by us
2214 		if (info->magic == kAreaAllocationMagic && info->area == area
2215 			&& info->size == areaInfo.size && info->base == areaInfo.address
2216 			&& info->allocation_size < areaInfo.size) {
2217 			delete_area(area);
2218 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2219 				area));
2220 			return;
2221 		}
2222 	}
2223 
2224 	panic("free(): free failed for address %p\n", address);
2225 }
2226 
2227 
2228 void *
2229 realloc(void *address, size_t newSize)
2230 {
2231 	if (!gKernelStartup && !are_interrupts_enabled()) {
2232 		panic("realloc(): called with interrupts disabled\n");
2233 		return NULL;
2234 	}
2235 
2236 	if (address == NULL)
2237 		return memalign(0, newSize);
2238 
2239 	if (newSize == 0) {
2240 		free(address);
2241 		return NULL;
2242 	}
2243 
2244 	void *newAddress = NULL;
2245 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2246 	for (uint32 i = 0; i < sHeapCount; i++) {
2247 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2248 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2249 #if PARANOID_HEAP_VALIDATION
2250 			heap_validate_heap(heap);
2251 #endif
2252 			return newAddress;
2253 		}
2254 	}
2255 
2256 	// maybe it was allocated from the dedicated grow heap
2257 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2258 		return newAddress;
2259 
2260 	// or maybe it was a huge allocation using an area
2261 	area_info areaInfo;
2262 	area_id area = area_for(address);
2263 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2264 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2265 
2266 		// just make extra sure it was allocated by us
2267 		if (info->magic == kAreaAllocationMagic && info->area == area
2268 			&& info->size == areaInfo.size && info->base == areaInfo.address
2269 			&& info->allocation_size < areaInfo.size) {
2270 			size_t available = info->size - ((addr_t)info->allocation_base
2271 				- (addr_t)info->base);
2272 
2273 			if (available >= newSize) {
2274 				// there is enough room available for the newSize
2275 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2276 					"available\n", newSize, area, available));
2277 				info->allocation_size = newSize;
2278 				return address;
2279 			}
2280 
2281 			// have to allocate/copy/free - TODO maybe resize the area instead?
2282 			newAddress = memalign(0, newSize);
2283 			if (newAddress == NULL) {
2284 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2285 					newSize);
2286 				return NULL;
2287 			}
2288 
2289 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2290 			delete_area(area);
2291 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2292 				"old area %ld\n", newAddress, newSize, area));
2293 			return newAddress;
2294 		}
2295 	}
2296 
2297 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2298 		newSize);
2299 	return NULL;
2300 }
2301 
2302 
2303 void *
2304 calloc(size_t numElements, size_t size)
2305 {
2306 	void *address = memalign(0, numElements * size);
2307 	if (address != NULL)
2308 		memset(address, 0, numElements * size);
2309 
2310 	return address;
2311 }
2312 
2313 
2314 void
2315 deferred_free(void *block)
2316 {
2317 	if (block == NULL)
2318 		return;
2319 
2320 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2321 
2322 	InterruptsSpinLocker _(sDeferredFreeListLock);
2323 	sDeferredFreeList.Add(entry);
2324 }
2325 
2326 
2327 void *
2328 malloc_referenced(size_t size)
2329 {
2330 	int32 *referencedData = (int32 *)malloc(size + 4);
2331 	if (referencedData == NULL)
2332 		return NULL;
2333 
2334 	*referencedData = 1;
2335 	return referencedData + 1;
2336 }
2337 
2338 
2339 void *
2340 malloc_referenced_acquire(void *data)
2341 {
2342 	if (data != NULL) {
2343 		int32 *referencedData = (int32 *)data - 1;
2344 		atomic_add(referencedData, 1);
2345 	}
2346 
2347 	return data;
2348 }
2349 
2350 
2351 void
2352 malloc_referenced_release(void *data)
2353 {
2354 	if (data == NULL)
2355 		return;
2356 
2357 	int32 *referencedData = (int32 *)data - 1;
2358 	if (atomic_add(referencedData, -1) < 1)
2359 		free(referencedData);
2360 }
2361 
2362 
2363 DeferredDeletable::~DeferredDeletable()
2364 {
2365 }
2366 
2367 
2368 void
2369 deferred_delete(DeferredDeletable *deletable)
2370 {
2371 	if (deletable == NULL)
2372 		return;
2373 
2374 	InterruptsSpinLocker _(sDeferredFreeListLock);
2375 	sDeferredDeletableList.Add(deletable);
2376 }
2377