xref: /haiku/src/system/kernel/heap.cpp (revision 4958c5d7b68abe13db5fbfa61305db81693f0eef)
1 /*
2  * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <string.h>
19 #include <team.h>
20 #include <thread.h>
21 #include <tracing.h>
22 #include <util/AutoLock.h>
23 #include <vm/vm.h>
24 #include <vm/vm_page.h>
25 
26 
27 //#define TRACE_HEAP
28 #ifdef TRACE_HEAP
29 #	define TRACE(x) dprintf x
30 #else
31 #	define TRACE(x) ;
32 #endif
33 
34 
35 #if !USE_DEBUG_HEAP_FOR_MALLOC
36 #	undef KERNEL_HEAP_LEAK_CHECK
37 #endif
38 
39 
40 #if KERNEL_HEAP_LEAK_CHECK
41 typedef struct heap_leak_check_info_s {
42 	addr_t		caller;
43 	size_t		size;
44 	thread_id	thread;
45 	team_id		team;
46 } heap_leak_check_info;
47 
48 struct caller_info {
49 	addr_t		caller;
50 	uint32		count;
51 	uint32		size;
52 };
53 
54 static const int32 kCallerInfoTableSize = 1024;
55 static caller_info sCallerInfoTable[kCallerInfoTableSize];
56 static int32 sCallerInfoCount = 0;
57 #endif	// KERNEL_HEAP_LEAK_CHECK
58 
59 
60 typedef struct heap_page_s heap_page;
61 
62 
63 typedef struct heap_area_s {
64 	area_id			area;
65 
66 	addr_t			base;
67 	size_t			size;
68 
69 	uint32			page_count;
70 	uint32			free_page_count;
71 
72 	heap_page *		free_pages;
73 	heap_page *		page_table;
74 
75 	heap_area_s *	prev;
76 	heap_area_s *	next;
77 	heap_area_s *	all_next;
78 } heap_area;
79 
80 
81 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
82 
83 typedef struct heap_page_s {
84 	heap_area *		area;
85 	uint16			index;
86 	uint16			bin_index : 5;
87 	uint16			free_count : 10;
88 	uint16			in_use : 1;
89 	heap_page_s *	next;
90 	heap_page_s *	prev;
91 	union {
92 		uint16			empty_index;
93 		uint16			allocation_id; // used for bin == bin_count allocations
94 	};
95 	addr_t *		free_list;
96 } heap_page;
97 
98 
99 typedef struct heap_bin_s {
100 	mutex		lock;
101 	uint32		element_size;
102 	uint16		max_free_count;
103 	heap_page *	page_list; // sorted so that the desired page is always first
104 } heap_bin;
105 
106 
107 struct heap_allocator_s {
108 	rw_lock		area_lock;
109 	mutex		page_lock;
110 
111 	const char *name;
112 	uint32		bin_count;
113 	uint32		page_size;
114 
115 	uint32		total_pages;
116 	uint32		total_free_pages;
117 	uint32		empty_areas;
118 
119 #if KERNEL_HEAP_LEAK_CHECK
120 	addr_t		(*get_caller)();
121 #endif
122 
123 	heap_bin *	bins;
124 	heap_area *	areas; // sorted so that the desired area is always first
125 	heap_area *	all_areas; // all areas including full ones
126 };
127 
128 
129 static const uint32 kAreaAllocationMagic = 'AAMG';
130 typedef struct area_allocation_info_s {
131 	area_id		area;
132 	void *		base;
133 	uint32		magic;
134 	size_t		size;
135 	size_t		allocation_size;
136 	size_t		allocation_alignment;
137 	void *		allocation_base;
138 } area_allocation_info;
139 
140 
141 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
142 };
143 
144 
145 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
146 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
147 
148 
149 #if USE_DEBUG_HEAP_FOR_MALLOC
150 
151 #define VIP_HEAP_SIZE	1024 * 1024
152 
153 // Heap class configuration
154 #define HEAP_CLASS_COUNT 3
155 static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
156 	{
157 		"small",					/* name */
158 		50,							/* initial percentage */
159 		B_PAGE_SIZE / 8,			/* max allocation size */
160 		B_PAGE_SIZE,				/* page size */
161 		8,							/* min bin size */
162 		4,							/* bin alignment */
163 		8,							/* min count per page */
164 		16							/* max waste per page */
165 	},
166 	{
167 		"medium",					/* name */
168 		30,							/* initial percentage */
169 		B_PAGE_SIZE * 2,			/* max allocation size */
170 		B_PAGE_SIZE * 8,			/* page size */
171 		B_PAGE_SIZE / 8,			/* min bin size */
172 		32,							/* bin alignment */
173 		4,							/* min count per page */
174 		64							/* max waste per page */
175 	},
176 	{
177 		"large",					/* name */
178 		20,							/* initial percentage */
179 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
180 		B_PAGE_SIZE * 16,			/* page size */
181 		B_PAGE_SIZE * 2,			/* min bin size */
182 		128,						/* bin alignment */
183 		1,							/* min count per page */
184 		256							/* max waste per page */
185 	}
186 };
187 
188 
189 static uint32 sHeapCount;
190 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
191 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
192 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
193 
194 static heap_allocator *sVIPHeap;
195 static heap_allocator *sGrowHeap = NULL;
196 static thread_id sHeapGrowThread = -1;
197 static sem_id sHeapGrowSem = -1;
198 static sem_id sHeapGrownNotify = -1;
199 static bool sAddGrowHeap = false;
200 
201 #endif	// USE_DEBUG_HEAP_FOR_MALLOC
202 
203 static DeferredFreeList sDeferredFreeList;
204 static DeferredDeletableList sDeferredDeletableList;
205 static spinlock sDeferredFreeListLock;
206 
207 
208 
209 // #pragma mark - Tracing
210 
211 #if KERNEL_HEAP_TRACING
212 namespace KernelHeapTracing {
213 
214 class Allocate : public AbstractTraceEntry {
215 	public:
216 		Allocate(addr_t address, size_t size)
217 			:	fAddress(address),
218 				fSize(size)
219 		{
220 			Initialized();
221 		}
222 
223 		virtual void AddDump(TraceOutput &out)
224 		{
225 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
226 		}
227 
228 	private:
229 		addr_t	fAddress;
230 		size_t	fSize;
231 };
232 
233 
234 class Reallocate : public AbstractTraceEntry {
235 	public:
236 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
237 			:	fOldAddress(oldAddress),
238 				fNewAddress(newAddress),
239 				fNewSize(newSize)
240 		{
241 			Initialized();
242 		};
243 
244 		virtual void AddDump(TraceOutput &out)
245 		{
246 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
247 				fOldAddress, fNewAddress, fNewSize);
248 		}
249 
250 	private:
251 		addr_t	fOldAddress;
252 		addr_t	fNewAddress;
253 		size_t	fNewSize;
254 };
255 
256 
257 class Free : public AbstractTraceEntry {
258 	public:
259 		Free(addr_t address)
260 			:	fAddress(address)
261 		{
262 			Initialized();
263 		};
264 
265 		virtual void AddDump(TraceOutput &out)
266 		{
267 			out.Print("heap free: 0x%08lx", fAddress);
268 		}
269 
270 	private:
271 		addr_t	fAddress;
272 };
273 
274 
275 } // namespace KernelHeapTracing
276 
277 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
278 #else
279 #	define T(x)	;
280 #endif
281 
282 
283 // #pragma mark - Debug functions
284 
285 
286 #if KERNEL_HEAP_LEAK_CHECK
287 static addr_t
288 get_caller()
289 {
290 	// Find the first return address outside of the allocator code. Note, that
291 	// this makes certain assumptions about how the code for the functions
292 	// ends up in the kernel object.
293 	addr_t returnAddresses[5];
294 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
295 		STACK_TRACE_KERNEL);
296 	for (int32 i = 0; i < depth; i++) {
297 		if (returnAddresses[i] < (addr_t)&get_caller
298 			|| returnAddresses[i] > (addr_t)&deferred_delete) {
299 			return returnAddresses[i];
300 		}
301 	}
302 
303 	return 0;
304 }
305 #endif
306 
307 
308 static void
309 dump_page(heap_page *page)
310 {
311 	uint32 count = 0;
312 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
313 		count++;
314 
315 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
316 		"free_list %p (%" B_PRIu32 " entr%s)\n", page, page->bin_index,
317 		page->free_count, page->empty_index, page->free_list, count,
318 		count == 1 ? "y" : "ies");
319 }
320 
321 
322 static void
323 dump_bin(heap_bin *bin)
324 {
325 	uint32 count = 0;
326 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
327 		count++;
328 
329 	kprintf("\telement_size: %" B_PRIu32 "; max_free_count: %u; page_list %p "
330 		"(%" B_PRIu32 " pages);\n", bin->element_size, bin->max_free_count,
331 		bin->page_list, count);
332 
333 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
334 		dump_page(page);
335 }
336 
337 
338 static void
339 dump_bin_list(heap_allocator *heap)
340 {
341 	for (uint32 i = 0; i < heap->bin_count; i++)
342 		dump_bin(&heap->bins[i]);
343 	kprintf("\n");
344 }
345 
346 
347 static void
348 dump_allocator_areas(heap_allocator *heap)
349 {
350 	heap_area *area = heap->all_areas;
351 	while (area) {
352 		kprintf("\tarea %p: area: %" B_PRId32 "; base: %p; size: %zu; page_count: "
353 			"%" B_PRIu32 "; free_pages: %p (%" B_PRIu32 " entr%s)\n", area,
354 			area->area, (void *)area->base, area->size, area->page_count,
355 			area->free_pages, area->free_page_count,
356 			area->free_page_count == 1 ? "y" : "ies");
357 		area = area->all_next;
358 	}
359 
360 	kprintf("\n");
361 }
362 
363 
364 static void
365 dump_allocator(heap_allocator *heap, bool areas, bool bins)
366 {
367 	kprintf("allocator %p: name: %s; page_size: %" B_PRIu32 "; bin_count: "
368 		"%" B_PRIu32 "; pages: %" B_PRIu32 "; free_pages: %" B_PRIu32 "; "
369 		"empty_areas: %" B_PRIu32 "\n", heap, heap->name, heap->page_size,
370 		heap->bin_count, heap->total_pages, heap->total_free_pages,
371 		heap->empty_areas);
372 
373 	if (areas)
374 		dump_allocator_areas(heap);
375 	if (bins)
376 		dump_bin_list(heap);
377 }
378 
379 
380 static int
381 dump_heap_list(int argc, char **argv)
382 {
383 #if USE_DEBUG_HEAP_FOR_MALLOC
384 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
385 		// only dump dedicated grow heap info
386 		kprintf("dedicated grow heap:\n");
387 		dump_allocator(sGrowHeap, true, true);
388 		return 0;
389 	}
390 #endif
391 
392 	bool stats = false;
393 	int i = 1;
394 
395 	if (strcmp(argv[1], "stats") == 0) {
396 		stats = true;
397 		i++;
398 	}
399 
400 	uint64 heapAddress = 0;
401 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
402 		print_debugger_command_usage(argv[0]);
403 		return 0;
404 	}
405 
406 	if (heapAddress == 0) {
407 #if USE_DEBUG_HEAP_FOR_MALLOC
408 		// dump default kernel heaps
409 		for (uint32 i = 0; i < sHeapCount; i++)
410 			dump_allocator(sHeaps[i], !stats, !stats);
411 #else
412 		print_debugger_command_usage(argv[0]);
413 #endif
414 	} else {
415 		// dump specified heap
416 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
417 	}
418 
419 	return 0;
420 }
421 
422 
423 #if !KERNEL_HEAP_LEAK_CHECK
424 
425 static int
426 dump_allocations(int argc, char **argv)
427 {
428 	uint64 heapAddress = 0;
429 	bool statsOnly = false;
430 	for (int32 i = 1; i < argc; i++) {
431 		if (strcmp(argv[i], "stats") == 0)
432 			statsOnly = true;
433 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
434 			print_debugger_command_usage(argv[0]);
435 			return 0;
436 		}
437 	}
438 
439 	size_t totalSize = 0;
440 	uint32 totalCount = 0;
441 #if USE_DEBUG_HEAP_FOR_MALLOC
442 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
443 		heap_allocator *heap = sHeaps[heapIndex];
444 		if (heapAddress != 0)
445 			heap = (heap_allocator *)(addr_t)heapAddress;
446 #else
447 	while (true) {
448 		heap_allocator *heap = (heap_allocator *)(addr_t)heapAddress;
449 		if (heap == NULL) {
450 			print_debugger_command_usage(argv[0]);
451 			return 0;
452 		}
453 #endif
454 #if 0
455 	}
456 #endif
457 
458 		// go through all the pages in all the areas
459 		heap_area *area = heap->all_areas;
460 		while (area) {
461 			for (uint32 i = 0; i < area->page_count; i++) {
462 				heap_page *page = &area->page_table[i];
463 				if (!page->in_use)
464 					continue;
465 
466 				addr_t base = area->base + i * heap->page_size;
467 				if (page->bin_index < heap->bin_count) {
468 					// page is used by a small allocation bin
469 					uint32 elementCount = page->empty_index;
470 					size_t elementSize
471 						= heap->bins[page->bin_index].element_size;
472 					for (uint32 j = 0; j < elementCount;
473 							j++, base += elementSize) {
474 						// walk the free list to see if this element is in use
475 						bool elementInUse = true;
476 						for (addr_t *temp = page->free_list; temp != NULL;
477 								temp = (addr_t *)*temp) {
478 							if ((addr_t)temp == base) {
479 								elementInUse = false;
480 								break;
481 							}
482 						}
483 
484 						if (!elementInUse)
485 							continue;
486 
487 						if (!statsOnly) {
488 							kprintf("address: 0x%p; size: %lu bytes\n",
489 								(void *)base, elementSize);
490 						}
491 
492 						totalSize += elementSize;
493 						totalCount++;
494 					}
495 				} else {
496 					// page is used by a big allocation, find the page count
497 					uint32 pageCount = 1;
498 					while (i + pageCount < area->page_count
499 						&& area->page_table[i + pageCount].in_use
500 						&& area->page_table[i + pageCount].bin_index
501 							== heap->bin_count
502 						&& area->page_table[i + pageCount].allocation_id
503 							== page->allocation_id)
504 						pageCount++;
505 
506 					size_t size = pageCount * heap->page_size;
507 
508 					if (!statsOnly) {
509 						kprintf("address: %p; size: %lu bytes\n", (void *)base,
510 							size);
511 					}
512 
513 					totalSize += size;
514 					totalCount++;
515 
516 					// skip the allocated pages
517 					i += pageCount - 1;
518 				}
519 			}
520 
521 			area = area->all_next;
522 		}
523 
524 		if (heapAddress != 0)
525 			break;
526 	}
527 
528 	kprintf("total allocations: %" B_PRIu32 "; total bytes: %zu\n", totalCount, totalSize);
529 	return 0;
530 }
531 
532 #else // !KERNEL_HEAP_LEAK_CHECK
533 
534 static int
535 dump_allocations(int argc, char **argv)
536 {
537 	team_id team = -1;
538 	thread_id thread = -1;
539 	addr_t caller = 0;
540 	addr_t address = 0;
541 	bool statsOnly = false;
542 
543 	for (int32 i = 1; i < argc; i++) {
544 		if (strcmp(argv[i], "team") == 0)
545 			team = parse_expression(argv[++i]);
546 		else if (strcmp(argv[i], "thread") == 0)
547 			thread = parse_expression(argv[++i]);
548 		else if (strcmp(argv[i], "caller") == 0)
549 			caller = parse_expression(argv[++i]);
550 		else if (strcmp(argv[i], "address") == 0)
551 			address = parse_expression(argv[++i]);
552 		else if (strcmp(argv[i], "stats") == 0)
553 			statsOnly = true;
554 		else {
555 			print_debugger_command_usage(argv[0]);
556 			return 0;
557 		}
558 	}
559 
560 	size_t totalSize = 0;
561 	uint32 totalCount = 0;
562 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
563 		heap_allocator *heap = sHeaps[heapIndex];
564 
565 		// go through all the pages in all the areas
566 		heap_area *area = heap->all_areas;
567 		while (area) {
568 			heap_leak_check_info *info = NULL;
569 			for (uint32 i = 0; i < area->page_count; i++) {
570 				heap_page *page = &area->page_table[i];
571 				if (!page->in_use)
572 					continue;
573 
574 				addr_t base = area->base + i * heap->page_size;
575 				if (page->bin_index < heap->bin_count) {
576 					// page is used by a small allocation bin
577 					uint32 elementCount = page->empty_index;
578 					size_t elementSize
579 						= heap->bins[page->bin_index].element_size;
580 					for (uint32 j = 0; j < elementCount;
581 							j++, base += elementSize) {
582 						// walk the free list to see if this element is in use
583 						bool elementInUse = true;
584 						for (addr_t *temp = page->free_list; temp != NULL;
585 								temp = (addr_t *)*temp) {
586 							if ((addr_t)temp == base) {
587 								elementInUse = false;
588 								break;
589 							}
590 						}
591 
592 						if (!elementInUse)
593 							continue;
594 
595 						info = (heap_leak_check_info *)(base + elementSize
596 							- sizeof(heap_leak_check_info));
597 
598 						if ((team == -1 || info->team == team)
599 							&& (thread == -1 || info->thread == thread)
600 							&& (caller == 0 || info->caller == caller)
601 							&& (address == 0 || base == address)) {
602 							// interesting...
603 							if (!statsOnly) {
604 								kprintf("team: % 6" B_PRId32 "; thread: % 6" B_PRId32 "; "
605 									"address: 0x%08lx; size: %lu bytes; "
606 									"caller: %#lx\n", info->team, info->thread,
607 									base, info->size, info->caller);
608 							}
609 
610 							totalSize += info->size;
611 							totalCount++;
612 						}
613 					}
614 				} else {
615 					// page is used by a big allocation, find the page count
616 					uint32 pageCount = 1;
617 					while (i + pageCount < area->page_count
618 						&& area->page_table[i + pageCount].in_use
619 						&& area->page_table[i + pageCount].bin_index
620 							== heap->bin_count
621 						&& area->page_table[i + pageCount].allocation_id
622 							== page->allocation_id)
623 						pageCount++;
624 
625 					info = (heap_leak_check_info *)(base + pageCount
626 						* heap->page_size - sizeof(heap_leak_check_info));
627 
628 					if ((team == -1 || info->team == team)
629 						&& (thread == -1 || info->thread == thread)
630 						&& (caller == 0 || info->caller == caller)
631 						&& (address == 0 || base == address)) {
632 						// interesting...
633 						if (!statsOnly) {
634 							kprintf("team: % 6" B_PRId32 "; thread: % 6" B_PRId32 ";"
635 								" address: 0x%08lx; size: %lu bytes;"
636 								" caller: %#lx\n", info->team, info->thread,
637 								base, info->size, info->caller);
638 						}
639 
640 						totalSize += info->size;
641 						totalCount++;
642 					}
643 
644 					// skip the allocated pages
645 					i += pageCount - 1;
646 				}
647 			}
648 
649 			area = area->all_next;
650 		}
651 	}
652 
653 	kprintf("total allocations: %" B_PRIu32 "; total bytes: %" B_PRIuSIZE "\n",
654 		totalCount, totalSize);
655 	return 0;
656 }
657 
658 
659 static caller_info*
660 get_caller_info(addr_t caller)
661 {
662 	// find the caller info
663 	for (int32 i = 0; i < sCallerInfoCount; i++) {
664 		if (caller == sCallerInfoTable[i].caller)
665 			return &sCallerInfoTable[i];
666 	}
667 
668 	// not found, add a new entry, if there are free slots
669 	if (sCallerInfoCount >= kCallerInfoTableSize)
670 		return NULL;
671 
672 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
673 	info->caller = caller;
674 	info->count = 0;
675 	info->size = 0;
676 
677 	return info;
678 }
679 
680 
681 static int
682 caller_info_compare_size(const void* _a, const void* _b)
683 {
684 	const caller_info* a = (const caller_info*)_a;
685 	const caller_info* b = (const caller_info*)_b;
686 	return (int)(b->size - a->size);
687 }
688 
689 
690 static int
691 caller_info_compare_count(const void* _a, const void* _b)
692 {
693 	const caller_info* a = (const caller_info*)_a;
694 	const caller_info* b = (const caller_info*)_b;
695 	return (int)(b->count - a->count);
696 }
697 
698 
699 static bool
700 analyze_allocation_callers(heap_allocator *heap)
701 {
702 	// go through all the pages in all the areas
703 	heap_area *area = heap->all_areas;
704 	while (area) {
705 		heap_leak_check_info *info = NULL;
706 		for (uint32 i = 0; i < area->page_count; i++) {
707 			heap_page *page = &area->page_table[i];
708 			if (!page->in_use)
709 				continue;
710 
711 			addr_t base = area->base + i * heap->page_size;
712 			if (page->bin_index < heap->bin_count) {
713 				// page is used by a small allocation bin
714 				uint32 elementCount = page->empty_index;
715 				size_t elementSize = heap->bins[page->bin_index].element_size;
716 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
717 					// walk the free list to see if this element is in use
718 					bool elementInUse = true;
719 					for (addr_t *temp = page->free_list; temp != NULL;
720 						temp = (addr_t *)*temp) {
721 						if ((addr_t)temp == base) {
722 							elementInUse = false;
723 							break;
724 						}
725 					}
726 
727 					if (!elementInUse)
728 						continue;
729 
730 					info = (heap_leak_check_info *)(base + elementSize
731 						- sizeof(heap_leak_check_info));
732 
733 					caller_info *callerInfo = get_caller_info(info->caller);
734 					if (callerInfo == NULL) {
735 						kprintf("out of space for caller infos\n");
736 						return false;
737 					}
738 
739 					callerInfo->count++;
740 					callerInfo->size += info->size;
741 				}
742 			} else {
743 				// page is used by a big allocation, find the page count
744 				uint32 pageCount = 1;
745 				while (i + pageCount < area->page_count
746 					&& area->page_table[i + pageCount].in_use
747 					&& area->page_table[i + pageCount].bin_index
748 						== heap->bin_count
749 					&& area->page_table[i + pageCount].allocation_id
750 						== page->allocation_id) {
751 					pageCount++;
752 				}
753 
754 				info = (heap_leak_check_info *)(base + pageCount
755 					* heap->page_size - sizeof(heap_leak_check_info));
756 
757 				caller_info *callerInfo = get_caller_info(info->caller);
758 				if (callerInfo == NULL) {
759 					kprintf("out of space for caller infos\n");
760 					return false;
761 				}
762 
763 				callerInfo->count++;
764 				callerInfo->size += info->size;
765 
766 				// skip the allocated pages
767 				i += pageCount - 1;
768 			}
769 		}
770 
771 		area = area->all_next;
772 	}
773 
774 	return true;
775 }
776 
777 
778 static int
779 dump_allocations_per_caller(int argc, char **argv)
780 {
781 	bool sortBySize = true;
782 	heap_allocator *heap = NULL;
783 
784 	for (int32 i = 1; i < argc; i++) {
785 		if (strcmp(argv[i], "-c") == 0) {
786 			sortBySize = false;
787 		} else if (strcmp(argv[i], "-h") == 0) {
788 			uint64 heapAddress;
789 			if (++i >= argc
790 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
791 				print_debugger_command_usage(argv[0]);
792 				return 0;
793 			}
794 
795 			heap = (heap_allocator*)(addr_t)heapAddress;
796 		} else {
797 			print_debugger_command_usage(argv[0]);
798 			return 0;
799 		}
800 	}
801 
802 	sCallerInfoCount = 0;
803 
804 	if (heap != NULL) {
805 		if (!analyze_allocation_callers(heap))
806 			return 0;
807 	} else {
808 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
809 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
810 				return 0;
811 		}
812 	}
813 
814 	// sort the array
815 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
816 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
817 
818 	kprintf("%" B_PRId32 " different callers, sorted by %s...\n\n",
819 		sCallerInfoCount, sortBySize ? "size" : "count");
820 
821 	kprintf("     count        size      caller\n");
822 	kprintf("----------------------------------\n");
823 	for (int32 i = 0; i < sCallerInfoCount; i++) {
824 		caller_info& info = sCallerInfoTable[i];
825 		kprintf("%10" B_PRId32 "  %10" B_PRId32 "  %#08lx", info.count, info.size, info.caller);
826 
827 		const char *symbol;
828 		const char *imageName;
829 		bool exactMatch;
830 		addr_t baseAddress;
831 
832 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
833 				&imageName, &exactMatch) == B_OK) {
834 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
835 				info.caller - baseAddress, imageName,
836 				exactMatch ? "" : " (nearest)");
837 		} else
838 			kprintf("\n");
839 	}
840 
841 	return 0;
842 }
843 
844 #endif // KERNEL_HEAP_LEAK_CHECK
845 
846 
847 #if PARANOID_HEAP_VALIDATION
848 static void
849 heap_validate_heap(heap_allocator *heap)
850 {
851 	ReadLocker areaReadLocker(heap->area_lock);
852 	for (uint32 i = 0; i < heap->bin_count; i++)
853 		mutex_lock(&heap->bins[i].lock);
854 	MutexLocker pageLocker(heap->page_lock);
855 
856 	uint32 totalPageCount = 0;
857 	uint32 totalFreePageCount = 0;
858 	heap_area *area = heap->all_areas;
859 	while (area != NULL) {
860 		// validate the free pages list
861 		uint32 freePageCount = 0;
862 		heap_page *lastPage = NULL;
863 		heap_page *page = area->free_pages;
864 		while (page) {
865 			if ((addr_t)page < (addr_t)&area->page_table[0]
866 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
867 				panic("free page is not part of the page table\n");
868 
869 			if (page->index >= area->page_count)
870 				panic("free page has invalid index\n");
871 
872 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
873 				panic("free page index does not lead to target page\n");
874 
875 			if (page->prev != lastPage)
876 				panic("free page entry has invalid prev link\n");
877 
878 			if (page->in_use)
879 				panic("free page marked as in use\n");
880 
881 			lastPage = page;
882 			page = page->next;
883 			freePageCount++;
884 		}
885 
886 		totalPageCount += freePageCount;
887 		totalFreePageCount += freePageCount;
888 		if (area->free_page_count != freePageCount)
889 			panic("free page count doesn't match free page list\n");
890 
891 		// validate the page table
892 		uint32 usedPageCount = 0;
893 		for (uint32 i = 0; i < area->page_count; i++) {
894 			if (area->page_table[i].in_use)
895 				usedPageCount++;
896 		}
897 
898 		totalPageCount += usedPageCount;
899 		if (freePageCount + usedPageCount != area->page_count) {
900 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
901 				freePageCount, usedPageCount, area->page_count);
902 		}
903 
904 		area = area->all_next;
905 	}
906 
907 	// validate the areas
908 	area = heap->areas;
909 	heap_area *lastArea = NULL;
910 	uint32 lastFreeCount = 0;
911 	while (area != NULL) {
912 		if (area->free_page_count < lastFreeCount)
913 			panic("size ordering of area list broken\n");
914 
915 		if (area->prev != lastArea)
916 			panic("area list entry has invalid prev link\n");
917 
918 		lastArea = area;
919 		lastFreeCount = area->free_page_count;
920 		area = area->next;
921 	}
922 
923 	lastArea = NULL;
924 	area = heap->all_areas;
925 	while (area != NULL) {
926 		if (lastArea != NULL && lastArea->base < area->base)
927 			panic("base ordering of all_areas list broken\n");
928 
929 		lastArea = area;
930 		area = area->all_next;
931 	}
932 
933 	// validate the bins
934 	for (uint32 i = 0; i < heap->bin_count; i++) {
935 		heap_bin *bin = &heap->bins[i];
936 		heap_page *lastPage = NULL;
937 		heap_page *page = bin->page_list;
938 		lastFreeCount = 0;
939 		while (page) {
940 			area = heap->all_areas;
941 			while (area) {
942 				if (area == page->area)
943 					break;
944 				area = area->all_next;
945 			}
946 
947 			if (area == NULL) {
948 				panic("page area not present in area list\n");
949 				page = page->next;
950 				continue;
951 			}
952 
953 			if ((addr_t)page < (addr_t)&area->page_table[0]
954 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
955 				panic("used page is not part of the page table\n");
956 
957 			if (page->index >= area->page_count)
958 				panic("used page has invalid index\n");
959 
960 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
961 				panic("used page index does not lead to target page\n");
962 
963 			if (page->prev != lastPage) {
964 				panic("used page entry has invalid prev link (%p vs %p bin "
965 					"%lu)\n", page->prev, lastPage, i);
966 			}
967 
968 			if (!page->in_use)
969 				panic("used page marked as not in use\n");
970 
971 			if (page->bin_index != i) {
972 				panic("used page with bin index %u in page list of bin %lu\n",
973 					page->bin_index, i);
974 			}
975 
976 			if (page->free_count < lastFreeCount)
977 				panic("ordering of bin page list broken\n");
978 
979 			// validate the free list
980 			uint32 freeSlotsCount = 0;
981 			addr_t *element = page->free_list;
982 			addr_t pageBase = area->base + page->index * heap->page_size;
983 			while (element) {
984 				if ((addr_t)element < pageBase
985 					|| (addr_t)element >= pageBase + heap->page_size)
986 					panic("free list entry out of page range\n");
987 
988 				if (((addr_t)element - pageBase) % bin->element_size != 0)
989 					panic("free list entry not on a element boundary\n");
990 
991 				element = (addr_t *)*element;
992 				freeSlotsCount++;
993 			}
994 
995 			uint32 slotCount = bin->max_free_count;
996 			if (page->empty_index > slotCount) {
997 				panic("empty index beyond slot count (%u with %lu slots)\n",
998 					page->empty_index, slotCount);
999 			}
1000 
1001 			freeSlotsCount += (slotCount - page->empty_index);
1002 			if (freeSlotsCount > slotCount)
1003 				panic("more free slots than fit into the page\n");
1004 
1005 			lastPage = page;
1006 			lastFreeCount = page->free_count;
1007 			page = page->next;
1008 		}
1009 	}
1010 
1011 	pageLocker.Unlock();
1012 	for (uint32 i = 0; i < heap->bin_count; i++)
1013 		mutex_unlock(&heap->bins[i].lock);
1014 	areaReadLocker.Unlock();
1015 }
1016 #endif // PARANOID_HEAP_VALIDATION
1017 
1018 
1019 // #pragma mark - Heap functions
1020 
1021 
1022 void
1023 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1024 {
1025 	heap_area *area = (heap_area *)base;
1026 	area->area = areaID;
1027 
1028 	base += sizeof(heap_area);
1029 	size -= sizeof(heap_area);
1030 
1031 	uint32 pageCount = size / heap->page_size;
1032 	size_t pageTableSize = pageCount * sizeof(heap_page);
1033 	area->page_table = (heap_page *)base;
1034 	base += pageTableSize;
1035 	size -= pageTableSize;
1036 
1037 	// the rest is now actually usable memory (rounded to the next page)
1038 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1039 	area->size = size & ~(B_PAGE_SIZE - 1);
1040 
1041 	// now we know the real page count
1042 	pageCount = area->size / heap->page_size;
1043 	area->page_count = pageCount;
1044 
1045 	// zero out the page table and fill in page indexes
1046 	memset((void *)area->page_table, 0, pageTableSize);
1047 	for (uint32 i = 0; i < pageCount; i++) {
1048 		area->page_table[i].area = area;
1049 		area->page_table[i].index = i;
1050 	}
1051 
1052 	// add all pages up into the free pages list
1053 	for (uint32 i = 1; i < pageCount; i++) {
1054 		area->page_table[i - 1].next = &area->page_table[i];
1055 		area->page_table[i].prev = &area->page_table[i - 1];
1056 	}
1057 	area->free_pages = &area->page_table[0];
1058 	area->free_page_count = pageCount;
1059 	area->page_table[0].prev = NULL;
1060 	area->next = NULL;
1061 
1062 	WriteLocker areaWriteLocker(heap->area_lock);
1063 	MutexLocker pageLocker(heap->page_lock);
1064 	if (heap->areas == NULL) {
1065 		// it's the only (empty) area in that heap
1066 		area->prev = NULL;
1067 		heap->areas = area;
1068 	} else {
1069 		// link in this area as the last one as it is completely empty
1070 		heap_area *lastArea = heap->areas;
1071 		while (lastArea->next != NULL)
1072 			lastArea = lastArea->next;
1073 
1074 		lastArea->next = area;
1075 		area->prev = lastArea;
1076 	}
1077 
1078 	// insert this area in the all_areas list so it stays ordered by base
1079 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1080 		area->all_next = heap->all_areas;
1081 		heap->all_areas = area;
1082 	} else {
1083 		heap_area *insert = heap->all_areas;
1084 		while (insert->all_next && insert->all_next->base > area->base)
1085 			insert = insert->all_next;
1086 
1087 		area->all_next = insert->all_next;
1088 		insert->all_next = area;
1089 	}
1090 
1091 	heap->total_pages += area->page_count;
1092 	heap->total_free_pages += area->free_page_count;
1093 
1094 	if (areaID >= 0) {
1095 		// this later on deletable area is yet empty - the empty count will be
1096 		// decremented as soon as this area is used for the first time
1097 		heap->empty_areas++;
1098 	}
1099 
1100 	pageLocker.Unlock();
1101 	areaWriteLocker.Unlock();
1102 
1103 	dprintf("heap_add_area: area %" B_PRId32 " added to %s heap %p - usable "
1104 		"range %p - %p\n", area->area, heap->name, heap, (void *)area->base,
1105 		(void *)(area->base + area->size));
1106 }
1107 
1108 
1109 static status_t
1110 heap_remove_area(heap_allocator *heap, heap_area *area)
1111 {
1112 	if (area->free_page_count != area->page_count) {
1113 		panic("tried removing heap area that has still pages in use");
1114 		return B_ERROR;
1115 	}
1116 
1117 	if (area->prev == NULL && area->next == NULL) {
1118 		panic("tried removing the last non-full heap area");
1119 		return B_ERROR;
1120 	}
1121 
1122 	if (heap->areas == area)
1123 		heap->areas = area->next;
1124 	if (area->prev != NULL)
1125 		area->prev->next = area->next;
1126 	if (area->next != NULL)
1127 		area->next->prev = area->prev;
1128 
1129 	if (heap->all_areas == area)
1130 		heap->all_areas = area->all_next;
1131 	else {
1132 		heap_area *previous = heap->all_areas;
1133 		while (previous) {
1134 			if (previous->all_next == area) {
1135 				previous->all_next = area->all_next;
1136 				break;
1137 			}
1138 
1139 			previous = previous->all_next;
1140 		}
1141 
1142 		if (previous == NULL)
1143 			panic("removing heap area that is not in all list");
1144 	}
1145 
1146 	heap->total_pages -= area->page_count;
1147 	heap->total_free_pages -= area->free_page_count;
1148 
1149 	dprintf("heap_remove_area: area %" B_PRId32 " with range %p - %p removed "
1150 		"from %s heap %p\n", area->area, (void *)area->base,
1151 		(void *)(area->base + area->size), heap->name, heap);
1152 
1153 	return B_OK;
1154 }
1155 
1156 
1157 heap_allocator *
1158 heap_create_allocator(const char *name, addr_t base, size_t size,
1159 	const heap_class *heapClass, bool allocateOnHeap)
1160 {
1161 	heap_allocator *heap;
1162 	if (allocateOnHeap) {
1163 		// allocate seperately on the heap
1164 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1165 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1166 	} else {
1167 		// use up the first part of the area
1168 		heap = (heap_allocator *)base;
1169 		base += sizeof(heap_allocator);
1170 		size -= sizeof(heap_allocator);
1171 	}
1172 
1173 	heap->name = name;
1174 	heap->page_size = heapClass->page_size;
1175 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1176 	heap->areas = heap->all_areas = NULL;
1177 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1178 
1179 #if KERNEL_HEAP_LEAK_CHECK
1180 	heap->get_caller = &get_caller;
1181 #endif
1182 
1183 	heap->bin_count = 0;
1184 	size_t binSize = 0, lastSize = 0;
1185 	uint32 count = heap->page_size / heapClass->min_bin_size;
1186 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1187 		if (heap->bin_count >= MAX_BIN_COUNT)
1188 			panic("heap configuration invalid - max bin count reached\n");
1189 
1190 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1191 		if (binSize == lastSize)
1192 			continue;
1193 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1194 			continue;
1195 
1196 		heap_bin *bin = &heap->bins[heap->bin_count];
1197 		mutex_init(&bin->lock, "heap bin lock");
1198 		bin->element_size = binSize;
1199 		bin->max_free_count = heap->page_size / binSize;
1200 		bin->page_list = NULL;
1201 		heap->bin_count++;
1202 	};
1203 
1204 	if (!allocateOnHeap) {
1205 		base += heap->bin_count * sizeof(heap_bin);
1206 		size -= heap->bin_count * sizeof(heap_bin);
1207 	}
1208 
1209 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1210 	mutex_init(&heap->page_lock, "heap page lock");
1211 
1212 	heap_add_area(heap, -1, base, size);
1213 	return heap;
1214 }
1215 
1216 
1217 static inline void
1218 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1219 {
1220 	area->free_page_count += pageCount;
1221 	heap->total_free_pages += pageCount;
1222 
1223 	if (area->free_page_count == pageCount) {
1224 		// we need to add ourselfs to the area list of the heap
1225 		area->prev = NULL;
1226 		area->next = heap->areas;
1227 		if (area->next)
1228 			area->next->prev = area;
1229 		heap->areas = area;
1230 	} else {
1231 		// we might need to move back in the area list
1232 		if (area->next && area->next->free_page_count < area->free_page_count) {
1233 			// move ourselfs so the list stays ordered
1234 			heap_area *insert = area->next;
1235 			while (insert->next
1236 				&& insert->next->free_page_count < area->free_page_count)
1237 				insert = insert->next;
1238 
1239 			if (area->prev)
1240 				area->prev->next = area->next;
1241 			if (area->next)
1242 				area->next->prev = area->prev;
1243 			if (heap->areas == area)
1244 				heap->areas = area->next;
1245 
1246 			area->prev = insert;
1247 			area->next = insert->next;
1248 			if (area->next)
1249 				area->next->prev = area;
1250 			insert->next = area;
1251 		}
1252 	}
1253 
1254 	if (area->free_page_count == area->page_count && area->area >= 0)
1255 		heap->empty_areas++;
1256 }
1257 
1258 
1259 static inline void
1260 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1261 {
1262 	if (area->free_page_count == area->page_count && area->area >= 0) {
1263 		// this area was completely empty
1264 		heap->empty_areas--;
1265 	}
1266 
1267 	area->free_page_count -= pageCount;
1268 	heap->total_free_pages -= pageCount;
1269 
1270 	if (area->free_page_count == 0) {
1271 		// the area is now full so we remove it from the area list
1272 		if (area->prev)
1273 			area->prev->next = area->next;
1274 		if (area->next)
1275 			area->next->prev = area->prev;
1276 		if (heap->areas == area)
1277 			heap->areas = area->next;
1278 		area->next = area->prev = NULL;
1279 	} else {
1280 		// we might need to move forward in the area list
1281 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1282 			// move ourselfs so the list stays ordered
1283 			heap_area *insert = area->prev;
1284 			while (insert->prev
1285 				&& insert->prev->free_page_count > area->free_page_count)
1286 				insert = insert->prev;
1287 
1288 			if (area->prev)
1289 				area->prev->next = area->next;
1290 			if (area->next)
1291 				area->next->prev = area->prev;
1292 
1293 			area->prev = insert->prev;
1294 			area->next = insert;
1295 			if (area->prev)
1296 				area->prev->next = area;
1297 			if (heap->areas == insert)
1298 				heap->areas = area;
1299 			insert->prev = area;
1300 		}
1301 	}
1302 }
1303 
1304 
1305 static inline void
1306 heap_link_page(heap_page *page, heap_page **list)
1307 {
1308 	page->prev = NULL;
1309 	page->next = *list;
1310 	if (page->next)
1311 		page->next->prev = page;
1312 	*list = page;
1313 }
1314 
1315 
1316 static inline void
1317 heap_unlink_page(heap_page *page, heap_page **list)
1318 {
1319 	if (page->prev)
1320 		page->prev->next = page->next;
1321 	if (page->next)
1322 		page->next->prev = page->prev;
1323 	if (list && *list == page) {
1324 		*list = page->next;
1325 		if (page->next)
1326 			page->next->prev = NULL;
1327 	}
1328 }
1329 
1330 
1331 static heap_page *
1332 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1333 	size_t alignment)
1334 {
1335 	MutexLocker pageLocker(heap->page_lock);
1336 	heap_area *area = heap->areas;
1337 	while (area) {
1338 		if (area->free_page_count < pageCount) {
1339 			area = area->next;
1340 			continue;
1341 		}
1342 
1343 		uint32 step = 1;
1344 		uint32 firstValid = 0;
1345 		const uint32 lastValid = area->page_count - pageCount + 1;
1346 
1347 		if (alignment > heap->page_size) {
1348 			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1349 				/ heap->page_size;
1350 			step = alignment / heap->page_size;
1351 		}
1352 
1353 		int32 first = -1;
1354 		for (uint32 i = firstValid; i < lastValid; i += step) {
1355 			if (area->page_table[i].in_use)
1356 				continue;
1357 
1358 			first = i;
1359 
1360 			for (uint32 j = 1; j < pageCount; j++) {
1361 				if (area->page_table[i + j].in_use) {
1362 					first = -1;
1363 					i += j / step * step;
1364 					break;
1365 				}
1366 			}
1367 
1368 			if (first >= 0)
1369 				break;
1370 		}
1371 
1372 		if (first < 0) {
1373 			area = area->next;
1374 			continue;
1375 		}
1376 
1377 		for (uint32 i = first; i < first + pageCount; i++) {
1378 			heap_page *page = &area->page_table[i];
1379 			page->in_use = 1;
1380 			page->bin_index = heap->bin_count;
1381 
1382 			heap_unlink_page(page, &area->free_pages);
1383 
1384 			page->next = page->prev = NULL;
1385 			page->free_list = NULL;
1386 			page->allocation_id = (uint16)first;
1387 		}
1388 
1389 		heap_free_pages_removed(heap, area, pageCount);
1390 		return &area->page_table[first];
1391 	}
1392 
1393 	return NULL;
1394 }
1395 
1396 
1397 #if KERNEL_HEAP_LEAK_CHECK
1398 static void
1399 heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1400 	size_t size)
1401 {
1402 	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1403 		- sizeof(heap_leak_check_info));
1404 	info->size = size - sizeof(heap_leak_check_info);
1405 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1406 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1407 	info->caller = heap->get_caller();
1408 }
1409 #endif
1410 
1411 
1412 static void *
1413 heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1414 {
1415 	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1416 		heap, size, alignment));
1417 
1418 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1419 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1420 		alignment);
1421 	if (firstPage == NULL) {
1422 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1423 			heap, size));
1424 		return NULL;
1425 	}
1426 
1427 	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1428 #if KERNEL_HEAP_LEAK_CHECK
1429 	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1430 #endif
1431 	return (void *)address;
1432 }
1433 
1434 
1435 static void *
1436 heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1437 {
1438 	heap_bin *bin = &heap->bins[binIndex];
1439 	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1440 		heap, size, binIndex, bin->element_size));
1441 
1442 	MutexLocker binLocker(bin->lock);
1443 	heap_page *page = bin->page_list;
1444 	if (page == NULL) {
1445 		MutexLocker pageLocker(heap->page_lock);
1446 		heap_area *area = heap->areas;
1447 		if (area == NULL) {
1448 			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1449 				size));
1450 			return NULL;
1451 		}
1452 
1453 		// by design there are only areas in the list that still have
1454 		// free pages available
1455 		page = area->free_pages;
1456 		area->free_pages = page->next;
1457 		if (page->next)
1458 			page->next->prev = NULL;
1459 
1460 		heap_free_pages_removed(heap, area, 1);
1461 
1462 		if (page->in_use)
1463 			panic("got an in use page %p from the free pages list\n", page);
1464 		page->in_use = 1;
1465 
1466 		pageLocker.Unlock();
1467 
1468 		page->bin_index = binIndex;
1469 		page->free_count = bin->max_free_count;
1470 		page->empty_index = 0;
1471 		page->free_list = NULL;
1472 		page->next = page->prev = NULL;
1473 		bin->page_list = page;
1474 	}
1475 
1476 	// we have a page where we have a free slot
1477 	void *address = NULL;
1478 	if (page->free_list) {
1479 		// there's a previously freed entry we can use
1480 		address = page->free_list;
1481 		page->free_list = (addr_t *)*page->free_list;
1482 	} else {
1483 		// the page hasn't been fully allocated so use the next empty_index
1484 		address = (void *)(page->area->base + page->index * heap->page_size
1485 			+ page->empty_index * bin->element_size);
1486 		page->empty_index++;
1487 	}
1488 
1489 	page->free_count--;
1490 	if (page->free_count == 0) {
1491 		// the page is now full so we remove it from the page_list
1492 		bin->page_list = page->next;
1493 		if (page->next)
1494 			page->next->prev = NULL;
1495 		page->next = page->prev = NULL;
1496 	}
1497 
1498 #if KERNEL_HEAP_LEAK_CHECK
1499 	binLocker.Unlock();
1500 	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1501 #endif
1502 	return address;
1503 }
1504 
1505 
1506 static bool
1507 is_valid_alignment(size_t number)
1508 {
1509 	// this cryptic line accepts zero and all powers of two
1510 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1511 }
1512 
1513 
1514 inline bool
1515 heap_should_grow(heap_allocator *heap)
1516 {
1517 	// suggest growing if there is less than 20% of a grow size available
1518 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1519 }
1520 
1521 
1522 void *
1523 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1524 {
1525 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1526 
1527 #if DEBUG
1528 	if (!is_valid_alignment(alignment))
1529 		panic("memalign() with an alignment which is not a power of 2\n");
1530 #endif
1531 
1532 #if KERNEL_HEAP_LEAK_CHECK
1533 	size += sizeof(heap_leak_check_info);
1534 #endif
1535 
1536 	void *address = NULL;
1537 	if (alignment < B_PAGE_SIZE) {
1538 		if (alignment != 0) {
1539 			// TODO: The alignment is done by ensuring that the element size
1540 			// of the target bin is aligned with the requested alignment. This
1541 			// has the problem that it wastes space because a better (smaller)
1542 			// bin could possibly be selected. We should pick the best bin and
1543 			// check if there is an aligned block in the free list or if a new
1544 			// (page aligned) page has to be allocated anyway.
1545 			size = ROUNDUP(size, alignment);
1546 			for (uint32 i = 0; i < heap->bin_count; i++) {
1547 				if (size <= heap->bins[i].element_size
1548 					&& is_valid_alignment(heap->bins[i].element_size)) {
1549 					address = heap_allocate_from_bin(heap, i, size);
1550 					break;
1551 				}
1552 			}
1553 		} else {
1554 			for (uint32 i = 0; i < heap->bin_count; i++) {
1555 				if (size <= heap->bins[i].element_size) {
1556 					address = heap_allocate_from_bin(heap, i, size);
1557 					break;
1558 				}
1559 			}
1560 		}
1561 	}
1562 
1563 	if (address == NULL)
1564 		address = heap_raw_alloc(heap, size, alignment);
1565 
1566 #if KERNEL_HEAP_LEAK_CHECK
1567 	size -= sizeof(heap_leak_check_info);
1568 #endif
1569 
1570 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1571 		size, address));
1572 
1573 	T(Allocate((addr_t)address, size));
1574 	if (address == NULL)
1575 		return address;
1576 
1577 #if PARANOID_KERNEL_MALLOC
1578 	memset(address, 0xcc, size);
1579 #endif
1580 
1581 #if PARANOID_KERNEL_FREE
1582 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1583 	// and the user does not clear it
1584 	if (((uint32 *)address)[1] == 0xdeadbeef)
1585 		((uint32 *)address)[1] = 0xcccccccc;
1586 #endif
1587 
1588 	return address;
1589 }
1590 
1591 
1592 status_t
1593 heap_free(heap_allocator *heap, void *address)
1594 {
1595 	if (address == NULL)
1596 		return B_OK;
1597 
1598 	ReadLocker areaReadLocker(heap->area_lock);
1599 	heap_area *area = heap->all_areas;
1600 	while (area) {
1601 		// since the all_areas list is ordered by base with the biggest
1602 		// base at the top, we need only find the first area with a base
1603 		// smaller than our address to become our only candidate for freeing
1604 		if (area->base <= (addr_t)address) {
1605 			if ((addr_t)address >= area->base + area->size) {
1606 				// none of the other areas can contain the address as the list
1607 				// is ordered
1608 				return B_ENTRY_NOT_FOUND;
1609 			}
1610 
1611 			// this area contains the allocation, we're done searching
1612 			break;
1613 		}
1614 
1615 		area = area->all_next;
1616 	}
1617 
1618 	if (area == NULL) {
1619 		// this address does not belong to us
1620 		return B_ENTRY_NOT_FOUND;
1621 	}
1622 
1623 	TRACE(("free(): asked to free pointer %p\n", address));
1624 
1625 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1626 		/ heap->page_size];
1627 
1628 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1629 		page->bin_index, page->free_count));
1630 
1631 	if (page->bin_index > heap->bin_count) {
1632 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1633 		return B_ERROR;
1634 	}
1635 
1636 	if (page->bin_index < heap->bin_count) {
1637 		// small allocation
1638 		heap_bin *bin = &heap->bins[page->bin_index];
1639 
1640 #if PARANOID_KERNEL_FREE
1641 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1642 			// This block looks like it was freed already, walk the free list
1643 			// on this page to make sure this address doesn't exist.
1644 			MutexLocker binLocker(bin->lock);
1645 			for (addr_t *temp = page->free_list; temp != NULL;
1646 					temp = (addr_t *)*temp) {
1647 				if (temp == address) {
1648 					panic("free(): address %p already exists in page free "
1649 						"list\n", address);
1650 					return B_ERROR;
1651 				}
1652 			}
1653 		}
1654 
1655 		// the first 4 bytes are overwritten with the next free list pointer
1656 		// later
1657 		uint32 *dead = (uint32 *)address;
1658 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1659 			dead[i] = 0xdeadbeef;
1660 #endif
1661 
1662 		MutexLocker binLocker(bin->lock);
1663 		if (((addr_t)address - area->base - page->index
1664 			* heap->page_size) % bin->element_size != 0) {
1665 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1666 				"element size %" B_PRIu32 "\n", address, bin->element_size);
1667 			return B_ERROR;
1668 		}
1669 
1670 		// add the address to the page free list
1671 		*(addr_t *)address = (addr_t)page->free_list;
1672 		page->free_list = (addr_t *)address;
1673 		page->free_count++;
1674 
1675 		if (page->free_count == bin->max_free_count) {
1676 			// we are now empty, remove the page from the bin list
1677 			MutexLocker pageLocker(heap->page_lock);
1678 			heap_unlink_page(page, &bin->page_list);
1679 			page->in_use = 0;
1680 			heap_link_page(page, &area->free_pages);
1681 			heap_free_pages_added(heap, area, 1);
1682 		} else if (page->free_count == 1) {
1683 			// we need to add ourselfs to the page list of the bin
1684 			heap_link_page(page, &bin->page_list);
1685 		} else {
1686 			// we might need to move back in the free pages list
1687 			if (page->next && page->next->free_count < page->free_count) {
1688 				// move ourselfs so the list stays ordered
1689 				heap_page *insert = page->next;
1690 				while (insert->next
1691 					&& insert->next->free_count < page->free_count)
1692 					insert = insert->next;
1693 
1694 				heap_unlink_page(page, &bin->page_list);
1695 
1696 				page->prev = insert;
1697 				page->next = insert->next;
1698 				if (page->next)
1699 					page->next->prev = page;
1700 				insert->next = page;
1701 			}
1702 		}
1703 	} else {
1704 		// large allocation, just return the pages to the page free list
1705 		uint32 allocationID = page->allocation_id;
1706 		uint32 maxPages = area->page_count - page->index;
1707 		uint32 pageCount = 0;
1708 
1709 		MutexLocker pageLocker(heap->page_lock);
1710 		for (uint32 i = 0; i < maxPages; i++) {
1711 			// loop until we find the end of this allocation
1712 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1713 				|| page[i].allocation_id != allocationID)
1714 				break;
1715 
1716 			// this page still belongs to the same allocation
1717 			page[i].in_use = 0;
1718 			page[i].allocation_id = 0;
1719 
1720 			// return it to the free list
1721 			heap_link_page(&page[i], &area->free_pages);
1722 			pageCount++;
1723 		}
1724 
1725 		heap_free_pages_added(heap, area, pageCount);
1726 	}
1727 
1728 	T(Free((addr_t)address));
1729 	areaReadLocker.Unlock();
1730 
1731 	if (heap->empty_areas > 1) {
1732 		WriteLocker areaWriteLocker(heap->area_lock);
1733 		MutexLocker pageLocker(heap->page_lock);
1734 
1735 		area_id areasToDelete[heap->empty_areas - 1];
1736 		int32 areasToDeleteIndex = 0;
1737 
1738 		area = heap->areas;
1739 		while (area != NULL && heap->empty_areas > 1) {
1740 			heap_area *next = area->next;
1741 			if (area->area >= 0
1742 				&& area->free_page_count == area->page_count
1743 				&& heap_remove_area(heap, area) == B_OK) {
1744 				areasToDelete[areasToDeleteIndex++] = area->area;
1745 				heap->empty_areas--;
1746 			}
1747 
1748 			area = next;
1749 		}
1750 
1751 		pageLocker.Unlock();
1752 		areaWriteLocker.Unlock();
1753 
1754 		for (int32 i = 0; i < areasToDeleteIndex; i++)
1755 			delete_area(areasToDelete[i]);
1756 	}
1757 
1758 	return B_OK;
1759 }
1760 
1761 
1762 #if KERNEL_HEAP_LEAK_CHECK
1763 extern "C" void
1764 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1765 {
1766 	heap->get_caller = getCaller;
1767 }
1768 #endif
1769 
1770 
1771 #if USE_DEBUG_HEAP_FOR_MALLOC
1772 
1773 
1774 static status_t
1775 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1776 	size_t newSize, uint32 flags)
1777 {
1778 	ReadLocker areaReadLocker(heap->area_lock);
1779 	heap_area *area = heap->all_areas;
1780 	while (area) {
1781 		// since the all_areas list is ordered by base with the biggest
1782 		// base at the top, we need only find the first area with a base
1783 		// smaller than our address to become our only candidate for
1784 		// reallocating
1785 		if (area->base <= (addr_t)address) {
1786 			if ((addr_t)address >= area->base + area->size) {
1787 				// none of the other areas can contain the address as the list
1788 				// is ordered
1789 				return B_ENTRY_NOT_FOUND;
1790 			}
1791 
1792 			// this area contains the allocation, we're done searching
1793 			break;
1794 		}
1795 
1796 		area = area->all_next;
1797 	}
1798 
1799 	if (area == NULL) {
1800 		// this address does not belong to us
1801 		return B_ENTRY_NOT_FOUND;
1802 	}
1803 
1804 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1805 
1806 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1807 		/ heap->page_size];
1808 	if (page->bin_index > heap->bin_count) {
1809 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1810 			page->bin_index);
1811 		return B_ERROR;
1812 	}
1813 
1814 	// find out the size of the old allocation first
1815 	size_t minSize = 0;
1816 	size_t maxSize = 0;
1817 	if (page->bin_index < heap->bin_count) {
1818 		// this was a small allocation
1819 		heap_bin *bin = &heap->bins[page->bin_index];
1820 		maxSize = bin->element_size;
1821 		if (page->bin_index > 0)
1822 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1823 	} else {
1824 		// this was a large allocation
1825 		uint32 allocationID = page->allocation_id;
1826 		uint32 maxPages = area->page_count - page->index;
1827 		maxSize = heap->page_size;
1828 
1829 		MutexLocker pageLocker(heap->page_lock);
1830 		for (uint32 i = 1; i < maxPages; i++) {
1831 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1832 				|| page[i].allocation_id != allocationID)
1833 				break;
1834 
1835 			minSize += heap->page_size;
1836 			maxSize += heap->page_size;
1837 		}
1838 	}
1839 
1840 	areaReadLocker.Unlock();
1841 
1842 #if KERNEL_HEAP_LEAK_CHECK
1843 	newSize += sizeof(heap_leak_check_info);
1844 #endif
1845 
1846 	// does the new allocation simply fit in the old allocation?
1847 	if (newSize > minSize && newSize <= maxSize) {
1848 #if KERNEL_HEAP_LEAK_CHECK
1849 		// update the size info (the info is at the end so stays where it is)
1850 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1851 			+ maxSize - sizeof(heap_leak_check_info));
1852 		info->size = newSize - sizeof(heap_leak_check_info);
1853 		newSize -= sizeof(heap_leak_check_info);
1854 #endif
1855 
1856 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1857 		*newAddress = address;
1858 		return B_OK;
1859 	}
1860 
1861 #if KERNEL_HEAP_LEAK_CHECK
1862 	// new leak check info will be created with the malloc below
1863 	newSize -= sizeof(heap_leak_check_info);
1864 #endif
1865 
1866 	// if not, allocate a new chunk of memory
1867 	*newAddress = malloc_etc(newSize, flags);
1868 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1869 	if (*newAddress == NULL) {
1870 		// we tried but it didn't work out, but still the operation is done
1871 		return B_OK;
1872 	}
1873 
1874 	// copy the old data and free the old allocation
1875 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1876 	heap_free(heap, address);
1877 	return B_OK;
1878 }
1879 
1880 
1881 inline uint32
1882 heap_index_for(size_t size, int32 cpu)
1883 {
1884 #if KERNEL_HEAP_LEAK_CHECK
1885 	// take the extra info size into account
1886 	size += sizeof(heap_leak_check_info_s);
1887 #endif
1888 
1889 	uint32 index = 0;
1890 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1891 		if (size <= sHeapClasses[index].max_allocation_size)
1892 			break;
1893 	}
1894 
1895 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1896 }
1897 
1898 
1899 static void *
1900 memalign_nogrow(size_t alignment, size_t size)
1901 {
1902 	// use dedicated memory in the grow thread by default
1903 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1904 		void *result = heap_memalign(sGrowHeap, alignment, size);
1905 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1906 			// hopefully the heap grower will manage to create a new heap
1907 			// before running out of private memory...
1908 			dprintf("heap: requesting new grow heap\n");
1909 			sAddGrowHeap = true;
1910 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1911 		}
1912 
1913 		if (result != NULL)
1914 			return result;
1915 	}
1916 
1917 	// try public memory, there might be something available
1918 	void *result = NULL;
1919 	int32 cpuCount = MIN(smp_get_num_cpus(),
1920 		(int32)sHeapCount / HEAP_CLASS_COUNT);
1921 	int32 cpuNumber = smp_get_current_cpu();
1922 	for (int32 i = 0; i < cpuCount; i++) {
1923 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1924 		heap_allocator *heap = sHeaps[heapIndex];
1925 		result = heap_memalign(heap, alignment, size);
1926 		if (result != NULL)
1927 			return result;
1928 	}
1929 
1930 	// no memory available
1931 	if (thread_get_current_thread_id() == sHeapGrowThread)
1932 		panic("heap: all heaps have run out of memory while growing\n");
1933 	else
1934 		dprintf("heap: all heaps have run out of memory\n");
1935 
1936 	return NULL;
1937 }
1938 
1939 
1940 static status_t
1941 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1942 {
1943 	void *address = NULL;
1944 	area_id heapArea = create_area(name, &address,
1945 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1946 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1947 	if (heapArea < B_OK) {
1948 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1949 		return heapArea;
1950 	}
1951 
1952 	heap_add_area(heap, heapArea, (addr_t)address, size);
1953 #if PARANOID_HEAP_VALIDATION
1954 	heap_validate_heap(heap);
1955 #endif
1956 	return B_OK;
1957 }
1958 
1959 
1960 static int32
1961 heap_grow_thread(void *)
1962 {
1963 	while (true) {
1964 		// wait for a request to grow the heap list
1965 		if (acquire_sem(sHeapGrowSem) < B_OK)
1966 			continue;
1967 
1968 		if (sAddGrowHeap) {
1969 			// the grow heap is going to run full soon, try to allocate a new
1970 			// one to make some room.
1971 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1972 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1973 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1974 				dprintf("heap_grower: failed to create new grow heap area\n");
1975 		}
1976 
1977 		for (uint32 i = 0; i < sHeapCount; i++) {
1978 			heap_allocator *heap = sHeaps[i];
1979 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1980 				|| heap_should_grow(heap)) {
1981 				// grow this heap if it is nearly full or if a grow was
1982 				// explicitly requested for this heap (happens when a large
1983 				// allocation cannot be fulfilled due to lack of contiguous
1984 				// pages)
1985 				if (heap_create_new_heap_area(heap, "additional heap",
1986 						HEAP_GROW_SIZE) != B_OK)
1987 					dprintf("heap_grower: failed to create new heap area\n");
1988 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1989 			}
1990 		}
1991 
1992 		// notify anyone waiting for this request
1993 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1994 	}
1995 
1996 	return 0;
1997 }
1998 
1999 
2000 #endif	// USE_DEBUG_HEAP_FOR_MALLOC
2001 
2002 
2003 static void
2004 deferred_deleter(void *arg, int iteration)
2005 {
2006 	// move entries and deletables to on-stack lists
2007 	InterruptsSpinLocker locker(sDeferredFreeListLock);
2008 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
2009 		return;
2010 
2011 	DeferredFreeList entries;
2012 	entries.MoveFrom(&sDeferredFreeList);
2013 
2014 	DeferredDeletableList deletables;
2015 	deletables.MoveFrom(&sDeferredDeletableList);
2016 
2017 	locker.Unlock();
2018 
2019 	// free the entries
2020 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
2021 		free(entry);
2022 
2023 	// delete the deletables
2024 	while (DeferredDeletable* deletable = deletables.RemoveHead())
2025 		delete deletable;
2026 }
2027 
2028 
2029 //	#pragma mark -
2030 
2031 
2032 #if USE_DEBUG_HEAP_FOR_MALLOC
2033 
2034 
2035 status_t
2036 heap_init(addr_t base, size_t size)
2037 {
2038 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2039 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2040 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2041 			&sHeapClasses[i], false);
2042 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2043 		base += partSize;
2044 		sHeapCount++;
2045 	}
2046 
2047 	// set up some debug commands
2048 	add_debugger_command_etc("heap", &dump_heap_list,
2049 		"Dump infos about the kernel heap(s)",
2050 		"[(\"grow\" | \"stats\" | <heap>)]\n"
2051 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2052 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2053 		"given as the argument, currently only the heap count is printed.\n"
2054 		"If <heap> is given, it is interpreted as the address of the heap to\n"
2055 		"print infos about.\n", 0);
2056 #if !KERNEL_HEAP_LEAK_CHECK
2057 	add_debugger_command_etc("allocations", &dump_allocations,
2058 		"Dump current heap allocations",
2059 		"[\"stats\"] [<heap>]\n"
2060 		"If no parameters are given, all current alloactions are dumped.\n"
2061 		"If the optional argument \"stats\" is specified, only the allocation\n"
2062 		"counts and no individual allocations are printed\n"
2063 		"If a specific heap address is given, only allocations of this\n"
2064 		"allocator are dumped\n", 0);
2065 #else // !KERNEL_HEAP_LEAK_CHECK
2066 	add_debugger_command_etc("allocations", &dump_allocations,
2067 		"Dump current heap allocations",
2068 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2069 		"If no parameters are given, all current alloactions are dumped.\n"
2070 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2071 		"argument, only allocations matching the team ID, thread ID, caller\n"
2072 		"address or allocated address given in the second argument are printed.\n"
2073 		"If the optional argument \"stats\" is specified, only the allocation\n"
2074 		"counts and no individual allocations are printed.\n", 0);
2075 	add_debugger_command_etc("allocations_per_caller",
2076 		&dump_allocations_per_caller,
2077 		"Dump current heap allocations summed up per caller",
2078 		"[ \"-c\" ] [ -h <heap> ]\n"
2079 		"The current allocations will by summed up by caller (their count and\n"
2080 		"size) printed in decreasing order by size or, if \"-c\" is\n"
2081 		"specified, by allocation count. If given <heap> specifies the\n"
2082 		"address of the heap for which to print the allocations.\n", 0);
2083 #endif // KERNEL_HEAP_LEAK_CHECK
2084 	return B_OK;
2085 }
2086 
2087 
2088 status_t
2089 heap_init_post_area()
2090 {
2091 	void *address = NULL;
2092 	area_id growHeapArea = create_area("dedicated grow heap", &address,
2093 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2094 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2095 	if (growHeapArea < 0) {
2096 		panic("heap_init_post_area(): couldn't allocate dedicate grow heap "
2097 			"area");
2098 		return growHeapArea;
2099 	}
2100 
2101 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2102 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2103 	if (sGrowHeap == NULL) {
2104 		panic("heap_init_post_area(): failed to create dedicated grow heap\n");
2105 		return B_ERROR;
2106 	}
2107 
2108 	// create the VIP heap
2109 	static const heap_class heapClass = {
2110 		"VIP I/O",					/* name */
2111 		100,						/* initial percentage */
2112 		B_PAGE_SIZE / 8,			/* max allocation size */
2113 		B_PAGE_SIZE,				/* page size */
2114 		8,							/* min bin size */
2115 		4,							/* bin alignment */
2116 		8,							/* min count per page */
2117 		16							/* max waste per page */
2118 	};
2119 
2120 	area_id vipHeapArea = create_area("VIP heap", &address,
2121 		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2122 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2123 	if (vipHeapArea < 0) {
2124 		panic("heap_init_post_area(): couldn't allocate VIP heap area");
2125 		return B_ERROR;
2126 	}
2127 
2128 	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2129 		VIP_HEAP_SIZE, &heapClass, false);
2130 	if (sVIPHeap == NULL) {
2131 		panic("heap_init_post_area(): failed to create VIP heap\n");
2132 		return B_ERROR;
2133 	}
2134 
2135 	dprintf("heap_init_post_area(): created VIP heap: %p\n", sVIPHeap);
2136 
2137 	return B_OK;
2138 }
2139 
2140 
2141 status_t
2142 heap_init_post_sem()
2143 {
2144 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2145 	if (sHeapGrowSem < 0) {
2146 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2147 		return B_ERROR;
2148 	}
2149 
2150 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2151 	if (sHeapGrownNotify < 0) {
2152 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2153 		return B_ERROR;
2154 	}
2155 
2156 	return B_OK;
2157 }
2158 
2159 
2160 #endif	// USE_DEBUG_HEAP_FOR_MALLOC
2161 
2162 
2163 status_t
2164 heap_init_post_thread()
2165 {
2166 #if	USE_DEBUG_HEAP_FOR_MALLOC
2167 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2168 		B_URGENT_PRIORITY, NULL);
2169 	if (sHeapGrowThread < 0) {
2170 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2171 		return sHeapGrowThread;
2172 	}
2173 
2174 	// create per-cpu heaps if there's enough memory
2175 	int32 heapCount = MIN(smp_get_num_cpus(),
2176 		(int32)vm_page_num_pages() / 60 / 1024);
2177 	for (int32 i = 1; i < heapCount; i++) {
2178 		addr_t base = 0;
2179 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2180 		area_id perCPUHeapArea = create_area("per cpu initial heap",
2181 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2182 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2183 		if (perCPUHeapArea < 0)
2184 			break;
2185 
2186 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2187 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2188 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2189 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2190 				base, partSize, &sHeapClasses[j], false);
2191 			sLastGrowRequest[heapIndex] = 0;
2192 			sLastHandledGrowRequest[heapIndex] = 0;
2193 			base += partSize;
2194 			sHeapCount++;
2195 		}
2196 	}
2197 
2198 	resume_thread(sHeapGrowThread);
2199 
2200 #else	// USE_DEBUG_HEAP_FOR_MALLOC
2201 
2202 	// set up some debug commands
2203 	add_debugger_command_etc("heap", &dump_heap_list,
2204 		"Dump infos about a specific heap",
2205 		"[\"stats\"] <heap>\n"
2206 		"Dump infos about the specified kernel heap. If \"stats\" is given\n"
2207 		"as the argument, currently only the heap count is printed.\n", 0);
2208 #if !KERNEL_HEAP_LEAK_CHECK
2209 	add_debugger_command_etc("heap_allocations", &dump_allocations,
2210 		"Dump current heap allocations",
2211 		"[\"stats\"] <heap>\n"
2212 		"If the optional argument \"stats\" is specified, only the allocation\n"
2213 		"counts and no individual allocations are printed.\n", 0);
2214 #endif	// KERNEL_HEAP_LEAK_CHECK
2215 #endif	// !USE_DEBUG_HEAP_FOR_MALLOC
2216 
2217 	// run the deferred deleter roughly once a second
2218 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2219 		panic("heap_init_post_thread(): failed to init deferred deleter");
2220 
2221 	return B_OK;
2222 }
2223 
2224 
2225 //	#pragma mark - Public API
2226 
2227 
2228 #if USE_DEBUG_HEAP_FOR_MALLOC
2229 
2230 
2231 void *
2232 memalign(size_t alignment, size_t size)
2233 {
2234 	if (!gKernelStartup && !are_interrupts_enabled()) {
2235 		panic("memalign(): called with interrupts disabled\n");
2236 		return NULL;
2237 	}
2238 
2239 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2240 		// don't even attempt such a huge allocation - use areas instead
2241 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2242 			+ alignment, B_PAGE_SIZE);
2243 		if (areaSize < size) {
2244 			// the size overflowed
2245 			return NULL;
2246 		}
2247 
2248 		void *address = NULL;
2249 		area_id allocationArea = create_area("memalign area", &address,
2250 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2251 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2252 		if (allocationArea < B_OK) {
2253 			dprintf("heap: failed to create area for huge allocation\n");
2254 			return NULL;
2255 		}
2256 
2257 		area_allocation_info *info = (area_allocation_info *)address;
2258 		info->magic = kAreaAllocationMagic;
2259 		info->area = allocationArea;
2260 		info->base = address;
2261 		info->size = areaSize;
2262 		info->allocation_size = size;
2263 		info->allocation_alignment = alignment;
2264 
2265 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2266 		if (alignment != 0) {
2267 			address = (void *)ROUNDUP((addr_t)address, alignment);
2268 			ASSERT((addr_t)address % alignment == 0);
2269 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2270 		}
2271 
2272 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2273 			allocationArea, size));
2274 
2275 		info->allocation_base = address;
2276 
2277 #if PARANOID_KERNEL_MALLOC
2278 		memset(address, 0xcc, size);
2279 #endif
2280 		return address;
2281 	}
2282 
2283 	void *result = NULL;
2284 	bool shouldGrow = false;
2285 	int32 cpuCount = MIN(smp_get_num_cpus(),
2286 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2287 	int32 cpuNumber = smp_get_current_cpu();
2288 	for (int32 i = 0; i < cpuCount; i++) {
2289 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2290 		heap_allocator *heap = sHeaps[heapIndex];
2291 		result = heap_memalign(heap, alignment, size);
2292 		if (result != NULL) {
2293 			shouldGrow = heap_should_grow(heap);
2294 			break;
2295 		}
2296 
2297 #if PARANOID_HEAP_VALIDATION
2298 		heap_validate_heap(heap);
2299 #endif
2300 	}
2301 
2302 	if (result == NULL) {
2303 		// request an urgent grow and wait - we don't do it ourselfs here to
2304 		// serialize growing through the grow thread, as otherwise multiple
2305 		// threads hitting this situation (likely when memory ran out) would
2306 		// all add areas
2307 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2308 		sLastGrowRequest[heapIndex]++;
2309 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2310 
2311 		// and then try again
2312 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2313 	} else if (shouldGrow) {
2314 		// should grow sometime soon, notify the grower
2315 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2316 	}
2317 
2318 	if (result == NULL)
2319 		panic("heap: kernel heap has run out of memory\n");
2320 	return result;
2321 }
2322 
2323 
2324 void *
2325 memalign_etc(size_t alignment, size_t size, uint32 flags)
2326 {
2327 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2328 		return heap_memalign(sVIPHeap, alignment, size);
2329 
2330 	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2331 			!= 0) {
2332 		return memalign_nogrow(alignment, size);
2333 	}
2334 
2335 	return memalign(alignment, size);
2336 }
2337 
2338 
2339 void
2340 free_etc(void *address, uint32 flags)
2341 {
2342 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2343 		heap_free(sVIPHeap, address);
2344 	else
2345 		free(address);
2346 }
2347 
2348 
2349 void *
2350 malloc(size_t size)
2351 {
2352 	return memalign_etc(0, size, 0);
2353 }
2354 
2355 
2356 void
2357 free(void *address)
2358 {
2359 	if (!gKernelStartup && !are_interrupts_enabled()) {
2360 		panic("free(): called with interrupts disabled\n");
2361 		return;
2362 	}
2363 
2364 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2365 	for (uint32 i = 0; i < sHeapCount; i++) {
2366 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2367 		if (heap_free(heap, address) == B_OK) {
2368 #if PARANOID_HEAP_VALIDATION
2369 			heap_validate_heap(heap);
2370 #endif
2371 			return;
2372 		}
2373 	}
2374 
2375 	// maybe it was allocated from the dedicated grow heap
2376 	if (heap_free(sGrowHeap, address) == B_OK)
2377 		return;
2378 
2379 	// or maybe it was allocated from the VIP heap
2380 	if (heap_free(sVIPHeap, address) == B_OK)
2381 		return;
2382 
2383 	// or maybe it was a huge allocation using an area
2384 	area_info areaInfo;
2385 	area_id area = area_for(address);
2386 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2387 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2388 
2389 		// just make extra sure it was allocated by us
2390 		if (info->magic == kAreaAllocationMagic && info->area == area
2391 			&& info->size == areaInfo.size && info->base == areaInfo.address
2392 			&& info->allocation_size < areaInfo.size) {
2393 			delete_area(area);
2394 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2395 				area));
2396 			return;
2397 		}
2398 	}
2399 
2400 	panic("free(): free failed for address %p\n", address);
2401 }
2402 
2403 
2404 void *
2405 realloc_etc(void *address, size_t newSize, uint32 flags)
2406 {
2407 	if (!gKernelStartup && !are_interrupts_enabled()) {
2408 		panic("realloc(): called with interrupts disabled\n");
2409 		return NULL;
2410 	}
2411 
2412 	if (address == NULL)
2413 		return malloc_etc(newSize, flags);
2414 
2415 	if (newSize == 0) {
2416 		free_etc(address, flags);
2417 		return NULL;
2418 	}
2419 
2420 	void *newAddress = NULL;
2421 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2422 	for (uint32 i = 0; i < sHeapCount; i++) {
2423 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2424 		if (heap_realloc(heap, address, &newAddress, newSize, flags) == B_OK) {
2425 #if PARANOID_HEAP_VALIDATION
2426 			heap_validate_heap(heap);
2427 #endif
2428 			return newAddress;
2429 		}
2430 	}
2431 
2432 	// maybe it was allocated from the dedicated grow heap
2433 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize, flags) == B_OK)
2434 		return newAddress;
2435 
2436 	// or maybe it was a huge allocation using an area
2437 	area_info areaInfo;
2438 	area_id area = area_for(address);
2439 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2440 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2441 
2442 		// just make extra sure it was allocated by us
2443 		if (info->magic == kAreaAllocationMagic && info->area == area
2444 			&& info->size == areaInfo.size && info->base == areaInfo.address
2445 			&& info->allocation_size < areaInfo.size) {
2446 			size_t available = info->size - ((addr_t)info->allocation_base
2447 				- (addr_t)info->base);
2448 
2449 			if (available >= newSize) {
2450 				// there is enough room available for the newSize
2451 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2452 					"available\n", newSize, area, available));
2453 				info->allocation_size = newSize;
2454 				return address;
2455 			}
2456 
2457 			// have to allocate/copy/free - TODO maybe resize the area instead?
2458 			newAddress = malloc(newSize);
2459 			if (newAddress == NULL) {
2460 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2461 					newSize);
2462 				return NULL;
2463 			}
2464 
2465 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2466 			delete_area(area);
2467 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2468 				"old area %ld\n", newAddress, newSize, area));
2469 			return newAddress;
2470 		}
2471 	}
2472 
2473 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2474 		newSize);
2475 	return NULL;
2476 }
2477 
2478 
2479 void *
2480 realloc(void *address, size_t newSize)
2481 {
2482 	return realloc_etc(address, newSize, 0);
2483 }
2484 
2485 
2486 #endif	// USE_DEBUG_HEAP_FOR_MALLOC
2487 
2488 
2489 void *
2490 calloc(size_t numElements, size_t size)
2491 {
2492 	if (size != 0 && numElements > (((size_t)(-1)) / size))
2493 		return NULL;
2494 
2495 	void *address = malloc(numElements * size);
2496 	if (address != NULL)
2497 		memset(address, 0, numElements * size);
2498 
2499 	return address;
2500 }
2501 
2502 
2503 void *
2504 aligned_alloc(size_t alignment, size_t size)
2505 {
2506 	if ((size % alignment) != 0)
2507 		return NULL;
2508 
2509 	return memalign(alignment, size);
2510 }
2511 
2512 
2513 void
2514 deferred_free(void *block)
2515 {
2516 	if (block == NULL)
2517 		return;
2518 
2519 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2520 
2521 	InterruptsSpinLocker _(sDeferredFreeListLock);
2522 	sDeferredFreeList.Add(entry);
2523 }
2524 
2525 
2526 DeferredDeletable::~DeferredDeletable()
2527 {
2528 }
2529 
2530 
2531 void
2532 deferred_delete(DeferredDeletable *deletable)
2533 {
2534 	if (deletable == NULL)
2535 		return;
2536 
2537 	InterruptsSpinLocker _(sDeferredFreeListLock);
2538 	sDeferredDeletableList.Add(deletable);
2539 }
2540