xref: /haiku/src/system/kernel/heap.cpp (revision 62f5ba006a08b0df30631375878effaf67ae5dbc)
1 /*
2  * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <signal.h>
19 #include <string.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tracing.h>
23 #include <util/AutoLock.h>
24 #include <vm/vm.h>
25 #include <vm/vm_page.h>
26 
27 
28 //#define TRACE_HEAP
29 #ifdef TRACE_HEAP
30 #	define TRACE(x) dprintf x
31 #else
32 #	define TRACE(x) ;
33 #endif
34 
35 
36 #if KERNEL_HEAP_LEAK_CHECK
37 typedef struct heap_leak_check_info_s {
38 	addr_t		caller;
39 	size_t		size;
40 	thread_id	thread;
41 	team_id		team;
42 } heap_leak_check_info;
43 
44 struct caller_info {
45 	addr_t		caller;
46 	uint32		count;
47 	uint32		size;
48 };
49 
50 static const int32 kCallerInfoTableSize = 1024;
51 static caller_info sCallerInfoTable[kCallerInfoTableSize];
52 static int32 sCallerInfoCount = 0;
53 #endif
54 
55 
56 typedef struct heap_page_s heap_page;
57 
58 
59 typedef struct heap_area_s {
60 	area_id			area;
61 
62 	addr_t			base;
63 	size_t			size;
64 
65 	uint32			page_count;
66 	uint32			free_page_count;
67 
68 	heap_page *		free_pages;
69 	heap_page *		page_table;
70 
71 	heap_area_s *	prev;
72 	heap_area_s *	next;
73 	heap_area_s *	all_next;
74 } heap_area;
75 
76 
77 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
78 
79 typedef struct heap_page_s {
80 	heap_area *		area;
81 	uint16			index;
82 	uint16			bin_index : 5;
83 	uint16			free_count : 10;
84 	uint16			in_use : 1;
85 	heap_page_s *	next;
86 	heap_page_s *	prev;
87 	union {
88 		uint16			empty_index;
89 		uint16			allocation_id; // used for bin == bin_count allocations
90 	};
91 	addr_t *		free_list;
92 } heap_page;
93 
94 
95 typedef struct heap_bin_s {
96 	mutex		lock;
97 	uint32		element_size;
98 	uint16		max_free_count;
99 	heap_page *	page_list; // sorted so that the desired page is always first
100 } heap_bin;
101 
102 
103 struct heap_allocator_s {
104 	rw_lock		area_lock;
105 	mutex		page_lock;
106 
107 	const char *name;
108 	uint32		bin_count;
109 	uint32		page_size;
110 
111 	uint32		total_pages;
112 	uint32		total_free_pages;
113 	uint32		empty_areas;
114 
115 #if KERNEL_HEAP_LEAK_CHECK
116 	addr_t		(*get_caller)();
117 #endif
118 
119 	heap_bin *	bins;
120 	heap_area *	areas; // sorted so that the desired area is always first
121 	heap_area *	all_areas; // all areas including full ones
122 };
123 
124 
125 static const uint32 kAreaAllocationMagic = 'AAMG';
126 typedef struct area_allocation_info_s {
127 	area_id		area;
128 	void *		base;
129 	uint32		magic;
130 	size_t		size;
131 	size_t		allocation_size;
132 	size_t		allocation_alignment;
133 	void *		allocation_base;
134 } area_allocation_info;
135 
136 
137 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
138 };
139 
140 
141 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
142 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
143 
144 
145 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
146 
147 #define VIP_HEAP_SIZE	1024 * 1024
148 
149 // Heap class configuration
150 #define HEAP_CLASS_COUNT 3
151 static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
152 	{
153 		"small",					/* name */
154 		50,							/* initial percentage */
155 		B_PAGE_SIZE / 8,			/* max allocation size */
156 		B_PAGE_SIZE,				/* page size */
157 		8,							/* min bin size */
158 		4,							/* bin alignment */
159 		8,							/* min count per page */
160 		16							/* max waste per page */
161 	},
162 	{
163 		"medium",					/* name */
164 		30,							/* initial percentage */
165 		B_PAGE_SIZE * 2,			/* max allocation size */
166 		B_PAGE_SIZE * 8,			/* page size */
167 		B_PAGE_SIZE / 8,			/* min bin size */
168 		32,							/* bin alignment */
169 		4,							/* min count per page */
170 		64							/* max waste per page */
171 	},
172 	{
173 		"large",					/* name */
174 		20,							/* initial percentage */
175 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
176 		B_PAGE_SIZE * 16,			/* page size */
177 		B_PAGE_SIZE * 2,			/* min bin size */
178 		128,						/* bin alignment */
179 		1,							/* min count per page */
180 		256							/* max waste per page */
181 	}
182 };
183 
184 
185 static uint32 sHeapCount;
186 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
187 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
188 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
189 
190 static heap_allocator *sVIPHeap;
191 static heap_allocator *sGrowHeap = NULL;
192 static thread_id sHeapGrowThread = -1;
193 static sem_id sHeapGrowSem = -1;
194 static sem_id sHeapGrownNotify = -1;
195 static bool sAddGrowHeap = false;
196 
197 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
198 
199 static DeferredFreeList sDeferredFreeList;
200 static DeferredDeletableList sDeferredDeletableList;
201 static spinlock sDeferredFreeListLock;
202 
203 
204 
205 // #pragma mark - Tracing
206 
207 #if KERNEL_HEAP_TRACING
208 namespace KernelHeapTracing {
209 
210 class Allocate : public AbstractTraceEntry {
211 	public:
212 		Allocate(addr_t address, size_t size)
213 			:	fAddress(address),
214 				fSize(size)
215 		{
216 			Initialized();
217 		}
218 
219 		virtual void AddDump(TraceOutput &out)
220 		{
221 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
222 		}
223 
224 	private:
225 		addr_t	fAddress;
226 		size_t	fSize;
227 };
228 
229 
230 class Reallocate : public AbstractTraceEntry {
231 	public:
232 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
233 			:	fOldAddress(oldAddress),
234 				fNewAddress(newAddress),
235 				fNewSize(newSize)
236 		{
237 			Initialized();
238 		};
239 
240 		virtual void AddDump(TraceOutput &out)
241 		{
242 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
243 				fOldAddress, fNewAddress, fNewSize);
244 		}
245 
246 	private:
247 		addr_t	fOldAddress;
248 		addr_t	fNewAddress;
249 		size_t	fNewSize;
250 };
251 
252 
253 class Free : public AbstractTraceEntry {
254 	public:
255 		Free(addr_t address)
256 			:	fAddress(address)
257 		{
258 			Initialized();
259 		};
260 
261 		virtual void AddDump(TraceOutput &out)
262 		{
263 			out.Print("heap free: 0x%08lx", fAddress);
264 		}
265 
266 	private:
267 		addr_t	fAddress;
268 };
269 
270 
271 } // namespace KernelHeapTracing
272 
273 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
274 #else
275 #	define T(x)	;
276 #endif
277 
278 
279 // #pragma mark - Debug functions
280 
281 
282 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
283 
284 
285 #if KERNEL_HEAP_LEAK_CHECK
286 static addr_t
287 get_caller()
288 {
289 	// Find the first return address outside of the allocator code. Note, that
290 	// this makes certain assumptions about how the code for the functions
291 	// ends up in the kernel object.
292 	addr_t returnAddresses[5];
293 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
294 		STACK_TRACE_KERNEL);
295 	for (int32 i = 0; i < depth; i++) {
296 		if (returnAddresses[i] < (addr_t)&get_caller
297 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
298 			return returnAddresses[i];
299 		}
300 	}
301 
302 	return 0;
303 }
304 #endif
305 
306 
307 static void
308 dump_page(heap_page *page)
309 {
310 	uint32 count = 0;
311 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
312 		count++;
313 
314 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
315 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
316 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
317 }
318 
319 
320 static void
321 dump_bin(heap_bin *bin)
322 {
323 	uint32 count = 0;
324 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
325 		count++;
326 
327 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
328 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
329 
330 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
331 		dump_page(page);
332 }
333 
334 
335 static void
336 dump_bin_list(heap_allocator *heap)
337 {
338 	for (uint32 i = 0; i < heap->bin_count; i++)
339 		dump_bin(&heap->bins[i]);
340 	kprintf("\n");
341 }
342 
343 
344 static void
345 dump_allocator_areas(heap_allocator *heap)
346 {
347 	heap_area *area = heap->all_areas;
348 	while (area) {
349 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
350 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
351 			area->size, area->page_count, area->free_pages,
352 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
353 		area = area->all_next;
354 	}
355 
356 	kprintf("\n");
357 }
358 
359 
360 static void
361 dump_allocator(heap_allocator *heap, bool areas, bool bins)
362 {
363 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
364 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
365 		heap->page_size, heap->bin_count, heap->total_pages,
366 		heap->total_free_pages, heap->empty_areas);
367 
368 	if (areas)
369 		dump_allocator_areas(heap);
370 	if (bins)
371 		dump_bin_list(heap);
372 }
373 
374 
375 static int
376 dump_heap_list(int argc, char **argv)
377 {
378 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
379 		// only dump dedicated grow heap info
380 		kprintf("dedicated grow heap:\n");
381 		dump_allocator(sGrowHeap, true, true);
382 		return 0;
383 	}
384 
385 	bool stats = false;
386 	int i = 1;
387 
388 	if (strcmp(argv[1], "stats") == 0) {
389 		stats = true;
390 		i++;
391 	}
392 
393 	uint64 heapAddress = 0;
394 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
395 		print_debugger_command_usage(argv[0]);
396 		return 0;
397 	}
398 
399 	if (heapAddress == 0) {
400 		// dump default kernel heaps
401 		for (uint32 i = 0; i < sHeapCount; i++)
402 			dump_allocator(sHeaps[i], !stats, !stats);
403 	} else {
404 		// dump specified heap
405 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
406 	}
407 
408 	return 0;
409 }
410 
411 
412 #if !KERNEL_HEAP_LEAK_CHECK
413 
414 static int
415 dump_allocations(int argc, char **argv)
416 {
417 	uint64 heapAddress = 0;
418 	bool statsOnly = false;
419 	for (int32 i = 1; i < argc; i++) {
420 		if (strcmp(argv[i], "stats") == 0)
421 			statsOnly = true;
422 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
423 			print_debugger_command_usage(argv[0]);
424 			return 0;
425 		}
426 	}
427 
428 	size_t totalSize = 0;
429 	uint32 totalCount = 0;
430 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
431 		heap_allocator *heap = sHeaps[heapIndex];
432 		if (heapAddress != 0)
433 			heap = (heap_allocator *)(addr_t)heapAddress;
434 
435 		// go through all the pages in all the areas
436 		heap_area *area = heap->all_areas;
437 		while (area) {
438 			for (uint32 i = 0; i < area->page_count; i++) {
439 				heap_page *page = &area->page_table[i];
440 				if (!page->in_use)
441 					continue;
442 
443 				addr_t base = area->base + i * heap->page_size;
444 				if (page->bin_index < heap->bin_count) {
445 					// page is used by a small allocation bin
446 					uint32 elementCount = page->empty_index;
447 					size_t elementSize
448 						= heap->bins[page->bin_index].element_size;
449 					for (uint32 j = 0; j < elementCount;
450 							j++, base += elementSize) {
451 						// walk the free list to see if this element is in use
452 						bool elementInUse = true;
453 						for (addr_t *temp = page->free_list; temp != NULL;
454 								temp = (addr_t *)*temp) {
455 							if ((addr_t)temp == base) {
456 								elementInUse = false;
457 								break;
458 							}
459 						}
460 
461 						if (!elementInUse)
462 							continue;
463 
464 						if (!statsOnly) {
465 							kprintf("address: 0x%08lx; size: %lu bytes\n",
466 								base, elementSize);
467 						}
468 
469 						totalSize += elementSize;
470 						totalCount++;
471 					}
472 				} else {
473 					// page is used by a big allocation, find the page count
474 					uint32 pageCount = 1;
475 					while (i + pageCount < area->page_count
476 						&& area->page_table[i + pageCount].in_use
477 						&& area->page_table[i + pageCount].bin_index
478 							== heap->bin_count
479 						&& area->page_table[i + pageCount].allocation_id
480 							== page->allocation_id)
481 						pageCount++;
482 
483 					size_t size = pageCount * heap->page_size;
484 
485 					if (!statsOnly) {
486 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
487 							size);
488 					}
489 
490 					totalSize += size;
491 					totalCount++;
492 
493 					// skip the allocated pages
494 					i += pageCount - 1;
495 				}
496 			}
497 
498 			area = area->all_next;
499 		}
500 
501 		if (heapAddress != 0)
502 			break;
503 	}
504 
505 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
506 	return 0;
507 }
508 
509 #else // !KERNEL_HEAP_LEAK_CHECK
510 
511 static int
512 dump_allocations(int argc, char **argv)
513 {
514 	team_id team = -1;
515 	thread_id thread = -1;
516 	addr_t caller = 0;
517 	addr_t address = 0;
518 	bool statsOnly = false;
519 
520 	for (int32 i = 1; i < argc; i++) {
521 		if (strcmp(argv[i], "team") == 0)
522 			team = parse_expression(argv[++i]);
523 		else if (strcmp(argv[i], "thread") == 0)
524 			thread = parse_expression(argv[++i]);
525 		else if (strcmp(argv[i], "caller") == 0)
526 			caller = parse_expression(argv[++i]);
527 		else if (strcmp(argv[i], "address") == 0)
528 			address = parse_expression(argv[++i]);
529 		else if (strcmp(argv[i], "stats") == 0)
530 			statsOnly = true;
531 		else {
532 			print_debugger_command_usage(argv[0]);
533 			return 0;
534 		}
535 	}
536 
537 	size_t totalSize = 0;
538 	uint32 totalCount = 0;
539 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
540 		heap_allocator *heap = sHeaps[heapIndex];
541 
542 		// go through all the pages in all the areas
543 		heap_area *area = heap->all_areas;
544 		while (area) {
545 			heap_leak_check_info *info = NULL;
546 			for (uint32 i = 0; i < area->page_count; i++) {
547 				heap_page *page = &area->page_table[i];
548 				if (!page->in_use)
549 					continue;
550 
551 				addr_t base = area->base + i * heap->page_size;
552 				if (page->bin_index < heap->bin_count) {
553 					// page is used by a small allocation bin
554 					uint32 elementCount = page->empty_index;
555 					size_t elementSize
556 						= heap->bins[page->bin_index].element_size;
557 					for (uint32 j = 0; j < elementCount;
558 							j++, base += elementSize) {
559 						// walk the free list to see if this element is in use
560 						bool elementInUse = true;
561 						for (addr_t *temp = page->free_list; temp != NULL;
562 								temp = (addr_t *)*temp) {
563 							if ((addr_t)temp == base) {
564 								elementInUse = false;
565 								break;
566 							}
567 						}
568 
569 						if (!elementInUse)
570 							continue;
571 
572 						info = (heap_leak_check_info *)(base + elementSize
573 							- sizeof(heap_leak_check_info));
574 
575 						if ((team == -1 || info->team == team)
576 							&& (thread == -1 || info->thread == thread)
577 							&& (caller == 0 || info->caller == caller)
578 							&& (address == 0 || base == address)) {
579 							// interesting...
580 							if (!statsOnly) {
581 								kprintf("team: % 6ld; thread: % 6ld; "
582 									"address: 0x%08lx; size: %lu bytes; "
583 									"caller: %#lx\n", info->team, info->thread,
584 									base, info->size, info->caller);
585 							}
586 
587 							totalSize += info->size;
588 							totalCount++;
589 						}
590 					}
591 				} else {
592 					// page is used by a big allocation, find the page count
593 					uint32 pageCount = 1;
594 					while (i + pageCount < area->page_count
595 						&& area->page_table[i + pageCount].in_use
596 						&& area->page_table[i + pageCount].bin_index
597 							== heap->bin_count
598 						&& area->page_table[i + pageCount].allocation_id
599 							== page->allocation_id)
600 						pageCount++;
601 
602 					info = (heap_leak_check_info *)(base + pageCount
603 						* heap->page_size - sizeof(heap_leak_check_info));
604 
605 					if ((team == -1 || info->team == team)
606 						&& (thread == -1 || info->thread == thread)
607 						&& (caller == 0 || info->caller == caller)
608 						&& (address == 0 || base == address)) {
609 						// interesting...
610 						if (!statsOnly) {
611 							kprintf("team: % 6ld; thread: % 6ld;"
612 								" address: 0x%08lx; size: %lu bytes;"
613 								" caller: %#lx\n", info->team, info->thread,
614 								base, info->size, info->caller);
615 						}
616 
617 						totalSize += info->size;
618 						totalCount++;
619 					}
620 
621 					// skip the allocated pages
622 					i += pageCount - 1;
623 				}
624 			}
625 
626 			area = area->all_next;
627 		}
628 	}
629 
630 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
631 		totalSize);
632 	return 0;
633 }
634 
635 
636 static caller_info*
637 get_caller_info(addr_t caller)
638 {
639 	// find the caller info
640 	for (int32 i = 0; i < sCallerInfoCount; i++) {
641 		if (caller == sCallerInfoTable[i].caller)
642 			return &sCallerInfoTable[i];
643 	}
644 
645 	// not found, add a new entry, if there are free slots
646 	if (sCallerInfoCount >= kCallerInfoTableSize)
647 		return NULL;
648 
649 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
650 	info->caller = caller;
651 	info->count = 0;
652 	info->size = 0;
653 
654 	return info;
655 }
656 
657 
658 static int
659 caller_info_compare_size(const void* _a, const void* _b)
660 {
661 	const caller_info* a = (const caller_info*)_a;
662 	const caller_info* b = (const caller_info*)_b;
663 	return (int)(b->size - a->size);
664 }
665 
666 
667 static int
668 caller_info_compare_count(const void* _a, const void* _b)
669 {
670 	const caller_info* a = (const caller_info*)_a;
671 	const caller_info* b = (const caller_info*)_b;
672 	return (int)(b->count - a->count);
673 }
674 
675 
676 static bool
677 analyze_allocation_callers(heap_allocator *heap)
678 {
679 	// go through all the pages in all the areas
680 	heap_area *area = heap->all_areas;
681 	while (area) {
682 		heap_leak_check_info *info = NULL;
683 		for (uint32 i = 0; i < area->page_count; i++) {
684 			heap_page *page = &area->page_table[i];
685 			if (!page->in_use)
686 				continue;
687 
688 			addr_t base = area->base + i * heap->page_size;
689 			if (page->bin_index < heap->bin_count) {
690 				// page is used by a small allocation bin
691 				uint32 elementCount = page->empty_index;
692 				size_t elementSize = heap->bins[page->bin_index].element_size;
693 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
694 					// walk the free list to see if this element is in use
695 					bool elementInUse = true;
696 					for (addr_t *temp = page->free_list; temp != NULL;
697 						temp = (addr_t *)*temp) {
698 						if ((addr_t)temp == base) {
699 							elementInUse = false;
700 							break;
701 						}
702 					}
703 
704 					if (!elementInUse)
705 						continue;
706 
707 					info = (heap_leak_check_info *)(base + elementSize
708 						- sizeof(heap_leak_check_info));
709 
710 					caller_info *callerInfo = get_caller_info(info->caller);
711 					if (callerInfo == NULL) {
712 						kprintf("out of space for caller infos\n");
713 						return 0;
714 					}
715 
716 					callerInfo->count++;
717 					callerInfo->size += info->size;
718 				}
719 			} else {
720 				// page is used by a big allocation, find the page count
721 				uint32 pageCount = 1;
722 				while (i + pageCount < area->page_count
723 					&& area->page_table[i + pageCount].in_use
724 					&& area->page_table[i + pageCount].bin_index
725 						== heap->bin_count
726 					&& area->page_table[i + pageCount].allocation_id
727 						== page->allocation_id) {
728 					pageCount++;
729 				}
730 
731 				info = (heap_leak_check_info *)(base + pageCount
732 					* heap->page_size - sizeof(heap_leak_check_info));
733 
734 				caller_info *callerInfo = get_caller_info(info->caller);
735 				if (callerInfo == NULL) {
736 					kprintf("out of space for caller infos\n");
737 					return false;
738 				}
739 
740 				callerInfo->count++;
741 				callerInfo->size += info->size;
742 
743 				// skip the allocated pages
744 				i += pageCount - 1;
745 			}
746 		}
747 
748 		area = area->all_next;
749 	}
750 
751 	return true;
752 }
753 
754 
755 static int
756 dump_allocations_per_caller(int argc, char **argv)
757 {
758 	bool sortBySize = true;
759 	heap_allocator *heap = NULL;
760 
761 	for (int32 i = 1; i < argc; i++) {
762 		if (strcmp(argv[i], "-c") == 0) {
763 			sortBySize = false;
764 		} else if (strcmp(argv[i], "-h") == 0) {
765 			uint64 heapAddress;
766 			if (++i >= argc
767 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
768 				print_debugger_command_usage(argv[0]);
769 				return 0;
770 			}
771 
772 			heap = (heap_allocator*)(addr_t)heapAddress;
773 		} else {
774 			print_debugger_command_usage(argv[0]);
775 			return 0;
776 		}
777 	}
778 
779 	sCallerInfoCount = 0;
780 
781 	if (heap != NULL) {
782 		if (!analyze_allocation_callers(heap))
783 			return 0;
784 	} else {
785 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
786 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
787 				return 0;
788 		}
789 	}
790 
791 	// sort the array
792 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
793 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
794 
795 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
796 		sortBySize ? "size" : "count");
797 
798 	kprintf("     count        size      caller\n");
799 	kprintf("----------------------------------\n");
800 	for (int32 i = 0; i < sCallerInfoCount; i++) {
801 		caller_info& info = sCallerInfoTable[i];
802 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
803 
804 		const char *symbol;
805 		const char *imageName;
806 		bool exactMatch;
807 		addr_t baseAddress;
808 
809 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
810 				&imageName, &exactMatch) == B_OK) {
811 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
812 				info.caller - baseAddress, imageName,
813 				exactMatch ? "" : " (nearest)");
814 		} else
815 			kprintf("\n");
816 	}
817 
818 	return 0;
819 }
820 
821 #endif // KERNEL_HEAP_LEAK_CHECK
822 
823 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
824 
825 
826 #if PARANOID_HEAP_VALIDATION
827 static void
828 heap_validate_heap(heap_allocator *heap)
829 {
830 	ReadLocker areaReadLocker(heap->area_lock);
831 	for (uint32 i = 0; i < heap->bin_count; i++)
832 		mutex_lock(&heap->bins[i].lock);
833 	MutexLocker pageLocker(heap->page_lock);
834 
835 	uint32 totalPageCount = 0;
836 	uint32 totalFreePageCount = 0;
837 	heap_area *area = heap->all_areas;
838 	while (area != NULL) {
839 		// validate the free pages list
840 		uint32 freePageCount = 0;
841 		heap_page *lastPage = NULL;
842 		heap_page *page = area->free_pages;
843 		while (page) {
844 			if ((addr_t)page < (addr_t)&area->page_table[0]
845 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
846 				panic("free page is not part of the page table\n");
847 
848 			if (page->index >= area->page_count)
849 				panic("free page has invalid index\n");
850 
851 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
852 				panic("free page index does not lead to target page\n");
853 
854 			if (page->prev != lastPage)
855 				panic("free page entry has invalid prev link\n");
856 
857 			if (page->in_use)
858 				panic("free page marked as in use\n");
859 
860 			lastPage = page;
861 			page = page->next;
862 			freePageCount++;
863 		}
864 
865 		totalPageCount += freePageCount;
866 		totalFreePageCount += freePageCount;
867 		if (area->free_page_count != freePageCount)
868 			panic("free page count doesn't match free page list\n");
869 
870 		// validate the page table
871 		uint32 usedPageCount = 0;
872 		for (uint32 i = 0; i < area->page_count; i++) {
873 			if (area->page_table[i].in_use)
874 				usedPageCount++;
875 		}
876 
877 		totalPageCount += usedPageCount;
878 		if (freePageCount + usedPageCount != area->page_count) {
879 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
880 				freePageCount, usedPageCount, area->page_count);
881 		}
882 
883 		area = area->all_next;
884 	}
885 
886 	// validate the areas
887 	area = heap->areas;
888 	heap_area *lastArea = NULL;
889 	uint32 lastFreeCount = 0;
890 	while (area != NULL) {
891 		if (area->free_page_count < lastFreeCount)
892 			panic("size ordering of area list broken\n");
893 
894 		if (area->prev != lastArea)
895 			panic("area list entry has invalid prev link\n");
896 
897 		lastArea = area;
898 		lastFreeCount = area->free_page_count;
899 		area = area->next;
900 	}
901 
902 	lastArea = NULL;
903 	area = heap->all_areas;
904 	while (area != NULL) {
905 		if (lastArea != NULL && lastArea->base < area->base)
906 			panic("base ordering of all_areas list broken\n");
907 
908 		lastArea = area;
909 		area = area->all_next;
910 	}
911 
912 	// validate the bins
913 	for (uint32 i = 0; i < heap->bin_count; i++) {
914 		heap_bin *bin = &heap->bins[i];
915 		heap_page *lastPage = NULL;
916 		heap_page *page = bin->page_list;
917 		lastFreeCount = 0;
918 		while (page) {
919 			area = heap->all_areas;
920 			while (area) {
921 				if (area == page->area)
922 					break;
923 				area = area->all_next;
924 			}
925 
926 			if (area == NULL) {
927 				panic("page area not present in area list\n");
928 				page = page->next;
929 				continue;
930 			}
931 
932 			if ((addr_t)page < (addr_t)&area->page_table[0]
933 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
934 				panic("used page is not part of the page table\n");
935 
936 			if (page->index >= area->page_count)
937 				panic("used page has invalid index\n");
938 
939 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
940 				panic("used page index does not lead to target page\n");
941 
942 			if (page->prev != lastPage) {
943 				panic("used page entry has invalid prev link (%p vs %p bin "
944 					"%lu)\n", page->prev, lastPage, i);
945 			}
946 
947 			if (!page->in_use)
948 				panic("used page marked as not in use\n");
949 
950 			if (page->bin_index != i) {
951 				panic("used page with bin index %u in page list of bin %lu\n",
952 					page->bin_index, i);
953 			}
954 
955 			if (page->free_count < lastFreeCount)
956 				panic("ordering of bin page list broken\n");
957 
958 			// validate the free list
959 			uint32 freeSlotsCount = 0;
960 			addr_t *element = page->free_list;
961 			addr_t pageBase = area->base + page->index * heap->page_size;
962 			while (element) {
963 				if ((addr_t)element < pageBase
964 					|| (addr_t)element >= pageBase + heap->page_size)
965 					panic("free list entry out of page range\n");
966 
967 				if (((addr_t)element - pageBase) % bin->element_size != 0)
968 					panic("free list entry not on a element boundary\n");
969 
970 				element = (addr_t *)*element;
971 				freeSlotsCount++;
972 			}
973 
974 			uint32 slotCount = bin->max_free_count;
975 			if (page->empty_index > slotCount) {
976 				panic("empty index beyond slot count (%u with %lu slots)\n",
977 					page->empty_index, slotCount);
978 			}
979 
980 			freeSlotsCount += (slotCount - page->empty_index);
981 			if (freeSlotsCount > slotCount)
982 				panic("more free slots than fit into the page\n");
983 
984 			lastPage = page;
985 			lastFreeCount = page->free_count;
986 			page = page->next;
987 		}
988 	}
989 
990 	pageLocker.Unlock();
991 	for (uint32 i = 0; i < heap->bin_count; i++)
992 		mutex_unlock(&heap->bins[i].lock);
993 	areaReadLocker.Unlock();
994 }
995 #endif // PARANOID_HEAP_VALIDATION
996 
997 
998 // #pragma mark - Heap functions
999 
1000 
1001 void
1002 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1003 {
1004 	heap_area *area = (heap_area *)base;
1005 	area->area = areaID;
1006 
1007 	base += sizeof(heap_area);
1008 	size -= sizeof(heap_area);
1009 
1010 	uint32 pageCount = size / heap->page_size;
1011 	size_t pageTableSize = pageCount * sizeof(heap_page);
1012 	area->page_table = (heap_page *)base;
1013 	base += pageTableSize;
1014 	size -= pageTableSize;
1015 
1016 	// the rest is now actually usable memory (rounded to the next page)
1017 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1018 	area->size = size & ~(B_PAGE_SIZE - 1);
1019 
1020 	// now we know the real page count
1021 	pageCount = area->size / heap->page_size;
1022 	area->page_count = pageCount;
1023 
1024 	// zero out the page table and fill in page indexes
1025 	memset((void *)area->page_table, 0, pageTableSize);
1026 	for (uint32 i = 0; i < pageCount; i++) {
1027 		area->page_table[i].area = area;
1028 		area->page_table[i].index = i;
1029 	}
1030 
1031 	// add all pages up into the free pages list
1032 	for (uint32 i = 1; i < pageCount; i++) {
1033 		area->page_table[i - 1].next = &area->page_table[i];
1034 		area->page_table[i].prev = &area->page_table[i - 1];
1035 	}
1036 	area->free_pages = &area->page_table[0];
1037 	area->free_page_count = pageCount;
1038 	area->page_table[0].prev = NULL;
1039 	area->next = NULL;
1040 
1041 	WriteLocker areaWriteLocker(heap->area_lock);
1042 	MutexLocker pageLocker(heap->page_lock);
1043 	if (heap->areas == NULL) {
1044 		// it's the only (empty) area in that heap
1045 		area->prev = NULL;
1046 		heap->areas = area;
1047 	} else {
1048 		// link in this area as the last one as it is completely empty
1049 		heap_area *lastArea = heap->areas;
1050 		while (lastArea->next != NULL)
1051 			lastArea = lastArea->next;
1052 
1053 		lastArea->next = area;
1054 		area->prev = lastArea;
1055 	}
1056 
1057 	// insert this area in the all_areas list so it stays ordered by base
1058 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1059 		area->all_next = heap->all_areas;
1060 		heap->all_areas = area;
1061 	} else {
1062 		heap_area *insert = heap->all_areas;
1063 		while (insert->all_next && insert->all_next->base > area->base)
1064 			insert = insert->all_next;
1065 
1066 		area->all_next = insert->all_next;
1067 		insert->all_next = area;
1068 	}
1069 
1070 	heap->total_pages += area->page_count;
1071 	heap->total_free_pages += area->free_page_count;
1072 
1073 	if (areaID >= 0) {
1074 		// this later on deletable area is yet empty - the empty count will be
1075 		// decremented as soon as this area is used for the first time
1076 		heap->empty_areas++;
1077 	}
1078 
1079 	pageLocker.Unlock();
1080 	areaWriteLocker.Unlock();
1081 
1082 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1083 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1084 		area->base + area->size);
1085 }
1086 
1087 
1088 static status_t
1089 heap_remove_area(heap_allocator *heap, heap_area *area)
1090 {
1091 	if (area->free_page_count != area->page_count) {
1092 		panic("tried removing heap area that has still pages in use");
1093 		return B_ERROR;
1094 	}
1095 
1096 	if (area->prev == NULL && area->next == NULL) {
1097 		panic("tried removing the last non-full heap area");
1098 		return B_ERROR;
1099 	}
1100 
1101 	if (heap->areas == area)
1102 		heap->areas = area->next;
1103 	if (area->prev != NULL)
1104 		area->prev->next = area->next;
1105 	if (area->next != NULL)
1106 		area->next->prev = area->prev;
1107 
1108 	if (heap->all_areas == area)
1109 		heap->all_areas = area->all_next;
1110 	else {
1111 		heap_area *previous = heap->all_areas;
1112 		while (previous) {
1113 			if (previous->all_next == area) {
1114 				previous->all_next = area->all_next;
1115 				break;
1116 			}
1117 
1118 			previous = previous->all_next;
1119 		}
1120 
1121 		if (previous == NULL)
1122 			panic("removing heap area that is not in all list");
1123 	}
1124 
1125 	heap->total_pages -= area->page_count;
1126 	heap->total_free_pages -= area->free_page_count;
1127 
1128 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1129 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1130 		heap->name, heap);
1131 
1132 	return B_OK;
1133 }
1134 
1135 
1136 heap_allocator *
1137 heap_create_allocator(const char *name, addr_t base, size_t size,
1138 	const heap_class *heapClass, bool allocateOnHeap)
1139 {
1140 	heap_allocator *heap;
1141 	if (allocateOnHeap) {
1142 		// allocate seperately on the heap
1143 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1144 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1145 	} else {
1146 		// use up the first part of the area
1147 		heap = (heap_allocator *)base;
1148 		base += sizeof(heap_allocator);
1149 		size -= sizeof(heap_allocator);
1150 	}
1151 
1152 	heap->name = name;
1153 	heap->page_size = heapClass->page_size;
1154 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1155 	heap->areas = heap->all_areas = NULL;
1156 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1157 
1158 #if KERNEL_HEAP_LEAK_CHECK
1159 	heap->get_caller = &get_caller;
1160 #endif
1161 
1162 	heap->bin_count = 0;
1163 	size_t binSize = 0, lastSize = 0;
1164 	uint32 count = heap->page_size / heapClass->min_bin_size;
1165 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1166 		if (heap->bin_count >= MAX_BIN_COUNT)
1167 			panic("heap configuration invalid - max bin count reached\n");
1168 
1169 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1170 		if (binSize == lastSize)
1171 			continue;
1172 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1173 			continue;
1174 
1175 		heap_bin *bin = &heap->bins[heap->bin_count];
1176 		mutex_init(&bin->lock, "heap bin lock");
1177 		bin->element_size = binSize;
1178 		bin->max_free_count = heap->page_size / binSize;
1179 		bin->page_list = NULL;
1180 		heap->bin_count++;
1181 	};
1182 
1183 	if (!allocateOnHeap) {
1184 		base += heap->bin_count * sizeof(heap_bin);
1185 		size -= heap->bin_count * sizeof(heap_bin);
1186 	}
1187 
1188 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1189 	mutex_init(&heap->page_lock, "heap page lock");
1190 
1191 	heap_add_area(heap, -1, base, size);
1192 	return heap;
1193 }
1194 
1195 
1196 static inline void
1197 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1198 {
1199 	area->free_page_count += pageCount;
1200 	heap->total_free_pages += pageCount;
1201 
1202 	if (area->free_page_count == pageCount) {
1203 		// we need to add ourselfs to the area list of the heap
1204 		area->prev = NULL;
1205 		area->next = heap->areas;
1206 		if (area->next)
1207 			area->next->prev = area;
1208 		heap->areas = area;
1209 	} else {
1210 		// we might need to move back in the area list
1211 		if (area->next && area->next->free_page_count < area->free_page_count) {
1212 			// move ourselfs so the list stays ordered
1213 			heap_area *insert = area->next;
1214 			while (insert->next
1215 				&& insert->next->free_page_count < area->free_page_count)
1216 				insert = insert->next;
1217 
1218 			if (area->prev)
1219 				area->prev->next = area->next;
1220 			if (area->next)
1221 				area->next->prev = area->prev;
1222 			if (heap->areas == area)
1223 				heap->areas = area->next;
1224 
1225 			area->prev = insert;
1226 			area->next = insert->next;
1227 			if (area->next)
1228 				area->next->prev = area;
1229 			insert->next = area;
1230 		}
1231 	}
1232 
1233 	if (area->free_page_count == area->page_count && area->area >= 0)
1234 		heap->empty_areas++;
1235 }
1236 
1237 
1238 static inline void
1239 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1240 {
1241 	if (area->free_page_count == area->page_count && area->area >= 0) {
1242 		// this area was completely empty
1243 		heap->empty_areas--;
1244 	}
1245 
1246 	area->free_page_count -= pageCount;
1247 	heap->total_free_pages -= pageCount;
1248 
1249 	if (area->free_page_count == 0) {
1250 		// the area is now full so we remove it from the area list
1251 		if (area->prev)
1252 			area->prev->next = area->next;
1253 		if (area->next)
1254 			area->next->prev = area->prev;
1255 		if (heap->areas == area)
1256 			heap->areas = area->next;
1257 		area->next = area->prev = NULL;
1258 	} else {
1259 		// we might need to move forward in the area list
1260 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1261 			// move ourselfs so the list stays ordered
1262 			heap_area *insert = area->prev;
1263 			while (insert->prev
1264 				&& insert->prev->free_page_count > area->free_page_count)
1265 				insert = insert->prev;
1266 
1267 			if (area->prev)
1268 				area->prev->next = area->next;
1269 			if (area->next)
1270 				area->next->prev = area->prev;
1271 
1272 			area->prev = insert->prev;
1273 			area->next = insert;
1274 			if (area->prev)
1275 				area->prev->next = area;
1276 			if (heap->areas == insert)
1277 				heap->areas = area;
1278 			insert->prev = area;
1279 		}
1280 	}
1281 }
1282 
1283 
1284 static inline void
1285 heap_link_page(heap_page *page, heap_page **list)
1286 {
1287 	page->prev = NULL;
1288 	page->next = *list;
1289 	if (page->next)
1290 		page->next->prev = page;
1291 	*list = page;
1292 }
1293 
1294 
1295 static inline void
1296 heap_unlink_page(heap_page *page, heap_page **list)
1297 {
1298 	if (page->prev)
1299 		page->prev->next = page->next;
1300 	if (page->next)
1301 		page->next->prev = page->prev;
1302 	if (list && *list == page) {
1303 		*list = page->next;
1304 		if (page->next)
1305 			page->next->prev = NULL;
1306 	}
1307 }
1308 
1309 
1310 static heap_page *
1311 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1312 	size_t alignment)
1313 {
1314 	MutexLocker pageLocker(heap->page_lock);
1315 	heap_area *area = heap->areas;
1316 	while (area) {
1317 		if (area->free_page_count < pageCount) {
1318 			area = area->next;
1319 			continue;
1320 		}
1321 
1322 		uint32 step = 1;
1323 		uint32 firstValid = 0;
1324 		const uint32 lastValid = area->page_count - pageCount + 1;
1325 
1326 		if (alignment > heap->page_size) {
1327 			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1328 				/ heap->page_size;
1329 			step = alignment / heap->page_size;
1330 		}
1331 
1332 		int32 first = -1;
1333 		for (uint32 i = firstValid; i < lastValid; i += step) {
1334 			if (area->page_table[i].in_use)
1335 				continue;
1336 
1337 			first = i;
1338 
1339 			for (uint32 j = 1; j < pageCount; j++) {
1340 				if (area->page_table[i + j].in_use) {
1341 					first = -1;
1342 					i += j / step * step;
1343 					break;
1344 				}
1345 			}
1346 
1347 			if (first >= 0)
1348 				break;
1349 		}
1350 
1351 		if (first < 0) {
1352 			area = area->next;
1353 			continue;
1354 		}
1355 
1356 		for (uint32 i = first; i < first + pageCount; i++) {
1357 			heap_page *page = &area->page_table[i];
1358 			page->in_use = 1;
1359 			page->bin_index = heap->bin_count;
1360 
1361 			heap_unlink_page(page, &area->free_pages);
1362 
1363 			page->next = page->prev = NULL;
1364 			page->free_list = NULL;
1365 			page->allocation_id = (uint16)first;
1366 		}
1367 
1368 		heap_free_pages_removed(heap, area, pageCount);
1369 		return &area->page_table[first];
1370 	}
1371 
1372 	return NULL;
1373 }
1374 
1375 
1376 #if KERNEL_HEAP_LEAK_CHECK
1377 static void
1378 heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1379 	size_t size)
1380 {
1381 	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1382 		- sizeof(heap_leak_check_info));
1383 	info->size = size - sizeof(heap_leak_check_info);
1384 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1385 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1386 	info->caller = heap->get_caller();
1387 }
1388 #endif
1389 
1390 
1391 static void *
1392 heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1393 {
1394 	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1395 		heap, size, alignment));
1396 
1397 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1398 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1399 		alignment);
1400 	if (firstPage == NULL) {
1401 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1402 			heap, size));
1403 		return NULL;
1404 	}
1405 
1406 	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1407 #if KERNEL_HEAP_LEAK_CHECK
1408 	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1409 #endif
1410 	return (void *)address;
1411 }
1412 
1413 
1414 static void *
1415 heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1416 {
1417 	heap_bin *bin = &heap->bins[binIndex];
1418 	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1419 		heap, size, binIndex, bin->element_size));
1420 
1421 	MutexLocker binLocker(bin->lock);
1422 	heap_page *page = bin->page_list;
1423 	if (page == NULL) {
1424 		MutexLocker pageLocker(heap->page_lock);
1425 		heap_area *area = heap->areas;
1426 		if (area == NULL) {
1427 			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1428 				size));
1429 			return NULL;
1430 		}
1431 
1432 		// by design there are only areas in the list that still have
1433 		// free pages available
1434 		page = area->free_pages;
1435 		area->free_pages = page->next;
1436 		if (page->next)
1437 			page->next->prev = NULL;
1438 
1439 		heap_free_pages_removed(heap, area, 1);
1440 
1441 		if (page->in_use)
1442 			panic("got an in use page %p from the free pages list\n", page);
1443 		page->in_use = 1;
1444 
1445 		pageLocker.Unlock();
1446 
1447 		page->bin_index = binIndex;
1448 		page->free_count = bin->max_free_count;
1449 		page->empty_index = 0;
1450 		page->free_list = NULL;
1451 		page->next = page->prev = NULL;
1452 		bin->page_list = page;
1453 	}
1454 
1455 	// we have a page where we have a free slot
1456 	void *address = NULL;
1457 	if (page->free_list) {
1458 		// there's a previously freed entry we can use
1459 		address = page->free_list;
1460 		page->free_list = (addr_t *)*page->free_list;
1461 	} else {
1462 		// the page hasn't been fully allocated so use the next empty_index
1463 		address = (void *)(page->area->base + page->index * heap->page_size
1464 			+ page->empty_index * bin->element_size);
1465 		page->empty_index++;
1466 	}
1467 
1468 	page->free_count--;
1469 	if (page->free_count == 0) {
1470 		// the page is now full so we remove it from the page_list
1471 		bin->page_list = page->next;
1472 		if (page->next)
1473 			page->next->prev = NULL;
1474 		page->next = page->prev = NULL;
1475 	}
1476 
1477 #if KERNEL_HEAP_LEAK_CHECK
1478 	binLocker.Unlock();
1479 	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1480 #endif
1481 	return address;
1482 }
1483 
1484 
1485 static bool
1486 is_valid_alignment(size_t number)
1487 {
1488 	// this cryptic line accepts zero and all powers of two
1489 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1490 }
1491 
1492 
1493 inline bool
1494 heap_should_grow(heap_allocator *heap)
1495 {
1496 	// suggest growing if there is less than 20% of a grow size available
1497 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1498 }
1499 
1500 
1501 void *
1502 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1503 {
1504 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1505 
1506 #if DEBUG
1507 	if (!is_valid_alignment(alignment))
1508 		panic("memalign() with an alignment which is not a power of 2\n");
1509 #endif
1510 
1511 #if KERNEL_HEAP_LEAK_CHECK
1512 	size += sizeof(heap_leak_check_info);
1513 #endif
1514 
1515 	void *address = NULL;
1516 	if (alignment < B_PAGE_SIZE) {
1517 		if (alignment != 0) {
1518 			// TODO: The alignment is done by ensuring that the element size
1519 			// of the target bin is aligned with the requested alignment. This
1520 			// has the problem that it wastes space because a better (smaller)
1521 			// bin could possibly be selected. We should pick the best bin and
1522 			// check if there is an aligned block in the free list or if a new
1523 			// (page aligned) page has to be allocated anyway.
1524 			size = ROUNDUP(size, alignment);
1525 			for (uint32 i = 0; i < heap->bin_count; i++) {
1526 				if (size <= heap->bins[i].element_size
1527 					&& is_valid_alignment(heap->bins[i].element_size)) {
1528 					address = heap_allocate_from_bin(heap, i, size);
1529 					break;
1530 				}
1531 			}
1532 		} else {
1533 			for (uint32 i = 0; i < heap->bin_count; i++) {
1534 				if (size <= heap->bins[i].element_size) {
1535 					address = heap_allocate_from_bin(heap, i, size);
1536 					break;
1537 				}
1538 			}
1539 		}
1540 	}
1541 
1542 	if (address == NULL)
1543 		address = heap_raw_alloc(heap, size, alignment);
1544 
1545 #if KERNEL_HEAP_LEAK_CHECK
1546 	size -= sizeof(heap_leak_check_info);
1547 #endif
1548 
1549 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1550 		size, address));
1551 
1552 	T(Allocate((addr_t)address, size));
1553 	if (address == NULL)
1554 		return address;
1555 
1556 #if PARANOID_KERNEL_MALLOC
1557 	memset(address, 0xcc, size);
1558 #endif
1559 
1560 #if PARANOID_KERNEL_FREE
1561 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1562 	// and the user does not clear it
1563 	if (((uint32 *)address)[1] == 0xdeadbeef)
1564 		((uint32 *)address)[1] = 0xcccccccc;
1565 #endif
1566 
1567 	return address;
1568 }
1569 
1570 
1571 status_t
1572 heap_free(heap_allocator *heap, void *address)
1573 {
1574 	if (address == NULL)
1575 		return B_OK;
1576 
1577 	ReadLocker areaReadLocker(heap->area_lock);
1578 	heap_area *area = heap->all_areas;
1579 	while (area) {
1580 		// since the all_areas list is ordered by base with the biggest
1581 		// base at the top, we need only find the first area with a base
1582 		// smaller than our address to become our only candidate for freeing
1583 		if (area->base <= (addr_t)address) {
1584 			if ((addr_t)address >= area->base + area->size) {
1585 				// none of the other areas can contain the address as the list
1586 				// is ordered
1587 				return B_ENTRY_NOT_FOUND;
1588 			}
1589 
1590 			// this area contains the allocation, we're done searching
1591 			break;
1592 		}
1593 
1594 		area = area->all_next;
1595 	}
1596 
1597 	if (area == NULL) {
1598 		// this address does not belong to us
1599 		return B_ENTRY_NOT_FOUND;
1600 	}
1601 
1602 	TRACE(("free(): asked to free pointer %p\n", address));
1603 
1604 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1605 		/ heap->page_size];
1606 
1607 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1608 		page->bin_index, page->free_count));
1609 
1610 	if (page->bin_index > heap->bin_count) {
1611 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1612 		return B_ERROR;
1613 	}
1614 
1615 	if (page->bin_index < heap->bin_count) {
1616 		// small allocation
1617 		heap_bin *bin = &heap->bins[page->bin_index];
1618 
1619 #if PARANOID_KERNEL_FREE
1620 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1621 			// This block looks like it was freed already, walk the free list
1622 			// on this page to make sure this address doesn't exist.
1623 			MutexLocker binLocker(bin->lock);
1624 			for (addr_t *temp = page->free_list; temp != NULL;
1625 					temp = (addr_t *)*temp) {
1626 				if (temp == address) {
1627 					panic("free(): address %p already exists in page free "
1628 						"list\n", address);
1629 					return B_ERROR;
1630 				}
1631 			}
1632 		}
1633 
1634 		// the first 4 bytes are overwritten with the next free list pointer
1635 		// later
1636 		uint32 *dead = (uint32 *)address;
1637 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1638 			dead[i] = 0xdeadbeef;
1639 #endif
1640 
1641 		MutexLocker binLocker(bin->lock);
1642 		if (((addr_t)address - area->base - page->index
1643 			* heap->page_size) % bin->element_size != 0) {
1644 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1645 				"element size %ld\n", address, bin->element_size);
1646 			return B_ERROR;
1647 		}
1648 
1649 		// add the address to the page free list
1650 		*(addr_t *)address = (addr_t)page->free_list;
1651 		page->free_list = (addr_t *)address;
1652 		page->free_count++;
1653 
1654 		if (page->free_count == bin->max_free_count) {
1655 			// we are now empty, remove the page from the bin list
1656 			MutexLocker pageLocker(heap->page_lock);
1657 			heap_unlink_page(page, &bin->page_list);
1658 			page->in_use = 0;
1659 			heap_link_page(page, &area->free_pages);
1660 			heap_free_pages_added(heap, area, 1);
1661 		} else if (page->free_count == 1) {
1662 			// we need to add ourselfs to the page list of the bin
1663 			heap_link_page(page, &bin->page_list);
1664 		} else {
1665 			// we might need to move back in the free pages list
1666 			if (page->next && page->next->free_count < page->free_count) {
1667 				// move ourselfs so the list stays ordered
1668 				heap_page *insert = page->next;
1669 				while (insert->next
1670 					&& insert->next->free_count < page->free_count)
1671 					insert = insert->next;
1672 
1673 				heap_unlink_page(page, &bin->page_list);
1674 
1675 				page->prev = insert;
1676 				page->next = insert->next;
1677 				if (page->next)
1678 					page->next->prev = page;
1679 				insert->next = page;
1680 			}
1681 		}
1682 	} else {
1683 		// large allocation, just return the pages to the page free list
1684 		uint32 allocationID = page->allocation_id;
1685 		uint32 maxPages = area->page_count - page->index;
1686 		uint32 pageCount = 0;
1687 
1688 		MutexLocker pageLocker(heap->page_lock);
1689 		for (uint32 i = 0; i < maxPages; i++) {
1690 			// loop until we find the end of this allocation
1691 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1692 				|| page[i].allocation_id != allocationID)
1693 				break;
1694 
1695 			// this page still belongs to the same allocation
1696 			page[i].in_use = 0;
1697 			page[i].allocation_id = 0;
1698 
1699 			// return it to the free list
1700 			heap_link_page(&page[i], &area->free_pages);
1701 			pageCount++;
1702 		}
1703 
1704 		heap_free_pages_added(heap, area, pageCount);
1705 	}
1706 
1707 	T(Free((addr_t)address));
1708 	areaReadLocker.Unlock();
1709 
1710 	if (heap->empty_areas > 1) {
1711 		WriteLocker areaWriteLocker(heap->area_lock);
1712 		MutexLocker pageLocker(heap->page_lock);
1713 
1714 		area = heap->areas;
1715 		while (area != NULL && heap->empty_areas > 1) {
1716 			heap_area *next = area->next;
1717 			if (area->area >= 0
1718 				&& area->free_page_count == area->page_count
1719 				&& heap_remove_area(heap, area) == B_OK) {
1720 				delete_area(area->area);
1721 				heap->empty_areas--;
1722 			}
1723 
1724 			area = next;
1725 		}
1726 	}
1727 
1728 	return B_OK;
1729 }
1730 
1731 
1732 #if KERNEL_HEAP_LEAK_CHECK
1733 extern "C" void
1734 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1735 {
1736 	heap->get_caller = getCaller;
1737 }
1738 #endif
1739 
1740 
1741 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
1742 
1743 
1744 static status_t
1745 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1746 	size_t newSize)
1747 {
1748 	ReadLocker areaReadLocker(heap->area_lock);
1749 	heap_area *area = heap->all_areas;
1750 	while (area) {
1751 		// since the all_areas list is ordered by base with the biggest
1752 		// base at the top, we need only find the first area with a base
1753 		// smaller than our address to become our only candidate for
1754 		// reallocating
1755 		if (area->base <= (addr_t)address) {
1756 			if ((addr_t)address >= area->base + area->size) {
1757 				// none of the other areas can contain the address as the list
1758 				// is ordered
1759 				return B_ENTRY_NOT_FOUND;
1760 			}
1761 
1762 			// this area contains the allocation, we're done searching
1763 			break;
1764 		}
1765 
1766 		area = area->all_next;
1767 	}
1768 
1769 	if (area == NULL) {
1770 		// this address does not belong to us
1771 		return B_ENTRY_NOT_FOUND;
1772 	}
1773 
1774 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1775 
1776 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1777 		/ heap->page_size];
1778 	if (page->bin_index > heap->bin_count) {
1779 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1780 			page->bin_index);
1781 		return B_ERROR;
1782 	}
1783 
1784 	// find out the size of the old allocation first
1785 	size_t minSize = 0;
1786 	size_t maxSize = 0;
1787 	if (page->bin_index < heap->bin_count) {
1788 		// this was a small allocation
1789 		heap_bin *bin = &heap->bins[page->bin_index];
1790 		maxSize = bin->element_size;
1791 		if (page->bin_index > 0)
1792 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1793 	} else {
1794 		// this was a large allocation
1795 		uint32 allocationID = page->allocation_id;
1796 		uint32 maxPages = area->page_count - page->index;
1797 		maxSize = heap->page_size;
1798 
1799 		MutexLocker pageLocker(heap->page_lock);
1800 		for (uint32 i = 1; i < maxPages; i++) {
1801 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1802 				|| page[i].allocation_id != allocationID)
1803 				break;
1804 
1805 			minSize += heap->page_size;
1806 			maxSize += heap->page_size;
1807 		}
1808 	}
1809 
1810 	areaReadLocker.Unlock();
1811 
1812 #if KERNEL_HEAP_LEAK_CHECK
1813 	newSize += sizeof(heap_leak_check_info);
1814 #endif
1815 
1816 	// does the new allocation simply fit in the old allocation?
1817 	if (newSize > minSize && newSize <= maxSize) {
1818 #if KERNEL_HEAP_LEAK_CHECK
1819 		// update the size info (the info is at the end so stays where it is)
1820 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1821 			+ maxSize - sizeof(heap_leak_check_info));
1822 		info->size = newSize - sizeof(heap_leak_check_info);
1823 		newSize -= sizeof(heap_leak_check_info);
1824 #endif
1825 
1826 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1827 		*newAddress = address;
1828 		return B_OK;
1829 	}
1830 
1831 #if KERNEL_HEAP_LEAK_CHECK
1832 	// new leak check info will be created with the malloc below
1833 	newSize -= sizeof(heap_leak_check_info);
1834 #endif
1835 
1836 	// if not, allocate a new chunk of memory
1837 	*newAddress = memalign(0, newSize);
1838 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1839 	if (*newAddress == NULL) {
1840 		// we tried but it didn't work out, but still the operation is done
1841 		return B_OK;
1842 	}
1843 
1844 	// copy the old data and free the old allocation
1845 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1846 	heap_free(heap, address);
1847 	return B_OK;
1848 }
1849 
1850 
1851 inline uint32
1852 heap_index_for(size_t size, int32 cpu)
1853 {
1854 #if KERNEL_HEAP_LEAK_CHECK
1855 	// take the extra info size into account
1856 	size += sizeof(heap_leak_check_info_s);
1857 #endif
1858 
1859 	uint32 index = 0;
1860 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1861 		if (size <= sHeapClasses[index].max_allocation_size)
1862 			break;
1863 	}
1864 
1865 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1866 }
1867 
1868 
1869 static void *
1870 memalign_nogrow(size_t alignment, size_t size)
1871 {
1872 	// use dedicated memory in the grow thread by default
1873 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1874 		void *result = heap_memalign(sGrowHeap, alignment, size);
1875 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1876 			// hopefully the heap grower will manage to create a new heap
1877 			// before running out of private memory...
1878 			dprintf("heap: requesting new grow heap\n");
1879 			sAddGrowHeap = true;
1880 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1881 		}
1882 
1883 		if (result != NULL)
1884 			return result;
1885 	}
1886 
1887 	// try public memory, there might be something available
1888 	void *result = NULL;
1889 	int32 cpuCount = MIN(smp_get_num_cpus(),
1890 		(int32)sHeapCount / HEAP_CLASS_COUNT);
1891 	int32 cpuNumber = smp_get_current_cpu();
1892 	for (int32 i = 0; i < cpuCount; i++) {
1893 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1894 		heap_allocator *heap = sHeaps[heapIndex];
1895 		result = heap_memalign(heap, alignment, size);
1896 		if (result != NULL)
1897 			return result;
1898 	}
1899 
1900 	// no memory available
1901 	if (thread_get_current_thread_id() == sHeapGrowThread)
1902 		panic("heap: all heaps have run out of memory while growing\n");
1903 	else
1904 		dprintf("heap: all heaps have run out of memory\n");
1905 
1906 	return NULL;
1907 }
1908 
1909 
1910 static status_t
1911 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1912 {
1913 	void *address = NULL;
1914 	area_id heapArea = create_area(name, &address,
1915 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1916 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1917 	if (heapArea < B_OK) {
1918 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1919 		return heapArea;
1920 	}
1921 
1922 	heap_add_area(heap, heapArea, (addr_t)address, size);
1923 #if PARANOID_HEAP_VALIDATION
1924 	heap_validate_heap(heap);
1925 #endif
1926 	return B_OK;
1927 }
1928 
1929 
1930 static int32
1931 heap_grow_thread(void *)
1932 {
1933 	while (true) {
1934 		// wait for a request to grow the heap list
1935 		if (acquire_sem(sHeapGrowSem) < B_OK)
1936 			continue;
1937 
1938 		if (sAddGrowHeap) {
1939 			// the grow heap is going to run full soon, try to allocate a new
1940 			// one to make some room.
1941 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1942 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1943 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1944 				dprintf("heap_grower: failed to create new grow heap area\n");
1945 		}
1946 
1947 		for (uint32 i = 0; i < sHeapCount; i++) {
1948 			heap_allocator *heap = sHeaps[i];
1949 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1950 				|| heap_should_grow(heap)) {
1951 				// grow this heap if it is nearly full or if a grow was
1952 				// explicitly requested for this heap (happens when a large
1953 				// allocation cannot be fulfilled due to lack of contiguous
1954 				// pages)
1955 				if (heap_create_new_heap_area(heap, "additional heap",
1956 						HEAP_GROW_SIZE) != B_OK)
1957 					dprintf("heap_grower: failed to create new heap area\n");
1958 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1959 			}
1960 		}
1961 
1962 		// notify anyone waiting for this request
1963 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1964 	}
1965 
1966 	return 0;
1967 }
1968 
1969 
1970 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
1971 
1972 
1973 static void
1974 deferred_deleter(void *arg, int iteration)
1975 {
1976 	// move entries and deletables to on-stack lists
1977 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1978 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1979 		return;
1980 
1981 	DeferredFreeList entries;
1982 	entries.MoveFrom(&sDeferredFreeList);
1983 
1984 	DeferredDeletableList deletables;
1985 	deletables.MoveFrom(&sDeferredDeletableList);
1986 
1987 	locker.Unlock();
1988 
1989 	// free the entries
1990 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1991 		free(entry);
1992 
1993 	// delete the deletables
1994 	while (DeferredDeletable* deletable = deletables.RemoveHead())
1995 		delete deletable;
1996 }
1997 
1998 
1999 //	#pragma mark -
2000 
2001 
2002 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2003 
2004 
2005 status_t
2006 heap_init(addr_t base, size_t size)
2007 {
2008 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2009 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2010 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2011 			&sHeapClasses[i], false);
2012 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2013 		base += partSize;
2014 		sHeapCount++;
2015 	}
2016 
2017 	// set up some debug commands
2018 	add_debugger_command_etc("heap", &dump_heap_list,
2019 		"Dump infos about the kernel heap(s)",
2020 		"[(\"grow\" | \"stats\" | <heap>)]\n"
2021 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2022 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2023 		"given as the argument, currently only the heap count is printed.\n"
2024 		"If <heap> is given, it is interpreted as the address of the heap to\n"
2025 		"print infos about.\n", 0);
2026 #if !KERNEL_HEAP_LEAK_CHECK
2027 	add_debugger_command_etc("allocations", &dump_allocations,
2028 		"Dump current heap allocations",
2029 		"[\"stats\"] [<heap>]\n"
2030 		"If no parameters are given, all current alloactions are dumped.\n"
2031 		"If the optional argument \"stats\" is specified, only the allocation\n"
2032 		"counts and no individual allocations are printed\n"
2033 		"If a specific heap address is given, only allocations of this\n"
2034 		"allocator are dumped\n", 0);
2035 #else // !KERNEL_HEAP_LEAK_CHECK
2036 	add_debugger_command_etc("allocations", &dump_allocations,
2037 		"Dump current heap allocations",
2038 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2039 		"If no parameters are given, all current alloactions are dumped.\n"
2040 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2041 		"argument, only allocations matching the team ID, thread ID, caller\n"
2042 		"address or allocated address given in the second argument are printed.\n"
2043 		"If the optional argument \"stats\" is specified, only the allocation\n"
2044 		"counts and no individual allocations are printed\n", 0);
2045 	add_debugger_command_etc("allocations_per_caller",
2046 		&dump_allocations_per_caller,
2047 		"Dump current heap allocations summed up per caller",
2048 		"[ \"-c\" ] [ -h <heap> ]\n"
2049 		"The current allocations will by summed up by caller (their count and\n"
2050 		"size) printed in decreasing order by size or, if \"-c\" is\n"
2051 		"specified, by allocation count. If given <heap> specifies the\n"
2052 		"address of the heap for which to print the allocations.\n", 0);
2053 #endif // KERNEL_HEAP_LEAK_CHECK
2054 	return B_OK;
2055 }
2056 
2057 
2058 status_t
2059 heap_init_post_sem()
2060 {
2061 #if PARANOID_KERNEL_MALLOC
2062 	vm_block_address_range("uninitialized heap memory",
2063 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2064 #endif
2065 #if PARANOID_KERNEL_FREE
2066 	vm_block_address_range("freed heap memory",
2067 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
2068 #endif
2069 
2070 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2071 	if (sHeapGrowSem < 0) {
2072 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2073 		return B_ERROR;
2074 	}
2075 
2076 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2077 	if (sHeapGrownNotify < 0) {
2078 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2079 		return B_ERROR;
2080 	}
2081 
2082 	return B_OK;
2083 }
2084 
2085 
2086 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2087 
2088 
2089 status_t
2090 heap_init_post_thread()
2091 {
2092 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
2093 	void *address = NULL;
2094 	area_id growHeapArea = create_area("dedicated grow heap", &address,
2095 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2096 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2097 	if (growHeapArea < 0) {
2098 		panic("heap_init_post_thread(): couldn't allocate dedicate grow heap "
2099 			"area");
2100 		return growHeapArea;
2101 	}
2102 
2103 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2104 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2105 	if (sGrowHeap == NULL) {
2106 		panic("heap_init_post_thread(): failed to create dedicated grow heap\n");
2107 		return B_ERROR;
2108 	}
2109 
2110 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2111 		B_URGENT_PRIORITY, NULL);
2112 	if (sHeapGrowThread < 0) {
2113 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2114 		return sHeapGrowThread;
2115 	}
2116 
2117 	// create per-cpu heaps if there's enough memory
2118 	int32 heapCount = MIN(smp_get_num_cpus(),
2119 		(int32)vm_page_num_pages() / 60 / 1024);
2120 	for (int32 i = 1; i < heapCount; i++) {
2121 		addr_t base = 0;
2122 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2123 		area_id perCPUHeapArea = create_area("per cpu inital heap",
2124 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2125 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2126 		if (perCPUHeapArea < 0)
2127 			break;
2128 
2129 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2130 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2131 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2132 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2133 				base, partSize, &sHeapClasses[j], false);
2134 			sLastGrowRequest[heapIndex] = 0;
2135 			sLastHandledGrowRequest[heapIndex] = 0;
2136 			base += partSize;
2137 			sHeapCount++;
2138 		}
2139 	}
2140 
2141 	// create the VIP heap
2142 	static const heap_class heapClass = {
2143 		"VIP I/O",					/* name */
2144 		100,						/* initial percentage */
2145 		B_PAGE_SIZE / 8,			/* max allocation size */
2146 		B_PAGE_SIZE,				/* page size */
2147 		8,							/* min bin size */
2148 		4,							/* bin alignment */
2149 		8,							/* min count per page */
2150 		16							/* max waste per page */
2151 	};
2152 
2153 	area_id vipHeapArea = create_area("VIP heap", &address,
2154 		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2155 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2156 	if (vipHeapArea < 0) {
2157 		panic("heap_init_post_thread(): couldn't allocate VIP heap area");
2158 		return B_ERROR;
2159 	}
2160 
2161 	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2162 		VIP_HEAP_SIZE, &heapClass, false);
2163 	if (sVIPHeap == NULL) {
2164 		panic("heap_init_post_thread(): failed to create VIP heap\n");
2165 		return B_ERROR;
2166 	}
2167 
2168 	dprintf("heap_init_post_thread(): created VIP heap: %p\n", sVIPHeap);
2169 
2170 	send_signal_etc(sHeapGrowThread, SIGCONT, B_DO_NOT_RESCHEDULE);
2171 
2172 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2173 
2174 	// run the deferred deleter roughly once a second
2175 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2176 		panic("heap_init_post_thread(): failed to init deferred deleter");
2177 
2178 	return B_OK;
2179 }
2180 
2181 
2182 //	#pragma mark - Public API
2183 
2184 
2185 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2186 
2187 
2188 void *
2189 memalign(size_t alignment, size_t size)
2190 {
2191 	if (!gKernelStartup && !are_interrupts_enabled()) {
2192 		panic("memalign(): called with interrupts disabled\n");
2193 		return NULL;
2194 	}
2195 
2196 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2197 		// don't even attempt such a huge allocation - use areas instead
2198 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2199 			+ alignment, B_PAGE_SIZE);
2200 		if (areaSize < size) {
2201 			// the size overflowed
2202 			return NULL;
2203 		}
2204 
2205 		void *address = NULL;
2206 		area_id allocationArea = create_area("memalign area", &address,
2207 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2208 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2209 		if (allocationArea < B_OK) {
2210 			dprintf("heap: failed to create area for huge allocation\n");
2211 			return NULL;
2212 		}
2213 
2214 		area_allocation_info *info = (area_allocation_info *)address;
2215 		info->magic = kAreaAllocationMagic;
2216 		info->area = allocationArea;
2217 		info->base = address;
2218 		info->size = areaSize;
2219 		info->allocation_size = size;
2220 		info->allocation_alignment = alignment;
2221 
2222 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2223 		if (alignment != 0) {
2224 			address = (void *)ROUNDUP((addr_t)address, alignment);
2225 			ASSERT((addr_t)address % alignment == 0);
2226 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2227 		}
2228 
2229 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2230 			allocationArea, size));
2231 
2232 		info->allocation_base = address;
2233 
2234 #if PARANOID_KERNEL_MALLOC
2235 		memset(address, 0xcc, size);
2236 #endif
2237 		return address;
2238 	}
2239 
2240 	void *result = NULL;
2241 	bool shouldGrow = false;
2242 	int32 cpuCount = MIN(smp_get_num_cpus(),
2243 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2244 	int32 cpuNumber = smp_get_current_cpu();
2245 	for (int32 i = 0; i < cpuCount; i++) {
2246 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2247 		heap_allocator *heap = sHeaps[heapIndex];
2248 		result = heap_memalign(heap, alignment, size);
2249 		if (result != NULL) {
2250 			shouldGrow = heap_should_grow(heap);
2251 			break;
2252 		}
2253 
2254 #if PARANOID_HEAP_VALIDATION
2255 		heap_validate_heap(heap);
2256 #endif
2257 	}
2258 
2259 	if (result == NULL) {
2260 		// request an urgent grow and wait - we don't do it ourselfs here to
2261 		// serialize growing through the grow thread, as otherwise multiple
2262 		// threads hitting this situation (likely when memory ran out) would
2263 		// all add areas
2264 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2265 		sLastGrowRequest[heapIndex]++;
2266 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2267 
2268 		// and then try again
2269 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2270 	} else if (shouldGrow) {
2271 		// should grow sometime soon, notify the grower
2272 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2273 	}
2274 
2275 	if (result == NULL)
2276 		panic("heap: kernel heap has run out of memory\n");
2277 	return result;
2278 }
2279 
2280 
2281 void *
2282 memalign_etc(size_t alignment, size_t size, uint32 flags)
2283 {
2284 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2285 		return heap_memalign(sVIPHeap, alignment, size);
2286 
2287 	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2288 			!= 0) {
2289 		return memalign_nogrow(alignment, size);
2290 	}
2291 
2292 	return memalign(alignment, size);
2293 }
2294 
2295 
2296 void
2297 free_etc(void *address, uint32 flags)
2298 {
2299 	if ((flags & HEAP_PRIORITY_VIP) != 0)
2300 		heap_free(sVIPHeap, address);
2301 	else
2302 		free(address);
2303 }
2304 
2305 
2306 void *
2307 malloc(size_t size)
2308 {
2309 	return memalign(0, size);
2310 }
2311 
2312 
2313 void
2314 free(void *address)
2315 {
2316 	if (!gKernelStartup && !are_interrupts_enabled()) {
2317 		panic("free(): called with interrupts disabled\n");
2318 		return;
2319 	}
2320 
2321 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2322 	for (uint32 i = 0; i < sHeapCount; i++) {
2323 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2324 		if (heap_free(heap, address) == B_OK) {
2325 #if PARANOID_HEAP_VALIDATION
2326 			heap_validate_heap(heap);
2327 #endif
2328 			return;
2329 		}
2330 	}
2331 
2332 	// maybe it was allocated from the dedicated grow heap
2333 	if (heap_free(sGrowHeap, address) == B_OK)
2334 		return;
2335 
2336 	// or maybe it was allocated from the VIP heap
2337 	if (heap_free(sVIPHeap, address) == B_OK)
2338 		return;
2339 
2340 	// or maybe it was a huge allocation using an area
2341 	area_info areaInfo;
2342 	area_id area = area_for(address);
2343 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2344 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2345 
2346 		// just make extra sure it was allocated by us
2347 		if (info->magic == kAreaAllocationMagic && info->area == area
2348 			&& info->size == areaInfo.size && info->base == areaInfo.address
2349 			&& info->allocation_size < areaInfo.size) {
2350 			delete_area(area);
2351 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2352 				area));
2353 			return;
2354 		}
2355 	}
2356 
2357 	panic("free(): free failed for address %p\n", address);
2358 }
2359 
2360 
2361 void *
2362 realloc(void *address, size_t newSize)
2363 {
2364 	if (!gKernelStartup && !are_interrupts_enabled()) {
2365 		panic("realloc(): called with interrupts disabled\n");
2366 		return NULL;
2367 	}
2368 
2369 	if (address == NULL)
2370 		return memalign(0, newSize);
2371 
2372 	if (newSize == 0) {
2373 		free(address);
2374 		return NULL;
2375 	}
2376 
2377 	void *newAddress = NULL;
2378 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2379 	for (uint32 i = 0; i < sHeapCount; i++) {
2380 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2381 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2382 #if PARANOID_HEAP_VALIDATION
2383 			heap_validate_heap(heap);
2384 #endif
2385 			return newAddress;
2386 		}
2387 	}
2388 
2389 	// maybe it was allocated from the dedicated grow heap
2390 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2391 		return newAddress;
2392 
2393 	// or maybe it was a huge allocation using an area
2394 	area_info areaInfo;
2395 	area_id area = area_for(address);
2396 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2397 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2398 
2399 		// just make extra sure it was allocated by us
2400 		if (info->magic == kAreaAllocationMagic && info->area == area
2401 			&& info->size == areaInfo.size && info->base == areaInfo.address
2402 			&& info->allocation_size < areaInfo.size) {
2403 			size_t available = info->size - ((addr_t)info->allocation_base
2404 				- (addr_t)info->base);
2405 
2406 			if (available >= newSize) {
2407 				// there is enough room available for the newSize
2408 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2409 					"available\n", newSize, area, available));
2410 				info->allocation_size = newSize;
2411 				return address;
2412 			}
2413 
2414 			// have to allocate/copy/free - TODO maybe resize the area instead?
2415 			newAddress = memalign(0, newSize);
2416 			if (newAddress == NULL) {
2417 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2418 					newSize);
2419 				return NULL;
2420 			}
2421 
2422 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2423 			delete_area(area);
2424 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2425 				"old area %ld\n", newAddress, newSize, area));
2426 			return newAddress;
2427 		}
2428 	}
2429 
2430 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2431 		newSize);
2432 	return NULL;
2433 }
2434 
2435 
2436 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2437 
2438 
2439 void *
2440 calloc(size_t numElements, size_t size)
2441 {
2442 	void *address = memalign(0, numElements * size);
2443 	if (address != NULL)
2444 		memset(address, 0, numElements * size);
2445 
2446 	return address;
2447 }
2448 
2449 
2450 void
2451 deferred_free(void *block)
2452 {
2453 	if (block == NULL)
2454 		return;
2455 
2456 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2457 
2458 	InterruptsSpinLocker _(sDeferredFreeListLock);
2459 	sDeferredFreeList.Add(entry);
2460 }
2461 
2462 
2463 void *
2464 malloc_referenced(size_t size)
2465 {
2466 	int32 *referencedData = (int32 *)malloc(size + 4);
2467 	if (referencedData == NULL)
2468 		return NULL;
2469 
2470 	*referencedData = 1;
2471 	return referencedData + 1;
2472 }
2473 
2474 
2475 void *
2476 malloc_referenced_acquire(void *data)
2477 {
2478 	if (data != NULL) {
2479 		int32 *referencedData = (int32 *)data - 1;
2480 		atomic_add(referencedData, 1);
2481 	}
2482 
2483 	return data;
2484 }
2485 
2486 
2487 void
2488 malloc_referenced_release(void *data)
2489 {
2490 	if (data == NULL)
2491 		return;
2492 
2493 	int32 *referencedData = (int32 *)data - 1;
2494 	if (atomic_add(referencedData, -1) < 1)
2495 		free(referencedData);
2496 }
2497 
2498 
2499 DeferredDeletable::~DeferredDeletable()
2500 {
2501 }
2502 
2503 
2504 void
2505 deferred_delete(DeferredDeletable *deletable)
2506 {
2507 	if (deletable == NULL)
2508 		return;
2509 
2510 	InterruptsSpinLocker _(sDeferredFreeListLock);
2511 	sDeferredDeletableList.Add(deletable);
2512 }
2513