xref: /haiku/src/system/kernel/heap.cpp (revision b4e5e4982360e684c5a13d227b9a958dbe725554)
1 /*
2  * Copyright 2008-2009, Michael Lotz, mmlr@mlotz.ch.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <signal.h>
19 #include <string.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tracing.h>
23 #include <util/AutoLock.h>
24 #include <vm/vm.h>
25 #include <vm/vm_page.h>
26 
27 
28 //#define TRACE_HEAP
29 #ifdef TRACE_HEAP
30 #	define TRACE(x) dprintf x
31 #else
32 #	define TRACE(x) ;
33 #endif
34 
35 
36 #if KERNEL_HEAP_LEAK_CHECK
37 typedef struct heap_leak_check_info_s {
38 	addr_t		caller;
39 	size_t		size;
40 	thread_id	thread;
41 	team_id		team;
42 } heap_leak_check_info;
43 
44 struct caller_info {
45 	addr_t		caller;
46 	uint32		count;
47 	uint32		size;
48 };
49 
50 static const int32 kCallerInfoTableSize = 1024;
51 static caller_info sCallerInfoTable[kCallerInfoTableSize];
52 static int32 sCallerInfoCount = 0;
53 #endif
54 
55 
56 typedef struct heap_page_s heap_page;
57 
58 
59 typedef struct heap_area_s {
60 	area_id			area;
61 
62 	addr_t			base;
63 	size_t			size;
64 
65 	uint32			page_count;
66 	uint32			free_page_count;
67 
68 	heap_page *		free_pages;
69 	heap_page *		page_table;
70 
71 	heap_area_s *	prev;
72 	heap_area_s *	next;
73 	heap_area_s *	all_next;
74 } heap_area;
75 
76 
77 #define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
78 
79 typedef struct heap_page_s {
80 	heap_area *		area;
81 	uint16			index;
82 	uint16			bin_index : 5;
83 	uint16			free_count : 10;
84 	uint16			in_use : 1;
85 	heap_page_s *	next;
86 	heap_page_s *	prev;
87 	union {
88 		uint16			empty_index;
89 		uint16			allocation_id; // used for bin == bin_count allocations
90 	};
91 	addr_t *		free_list;
92 } heap_page;
93 
94 
95 typedef struct heap_bin_s {
96 	mutex		lock;
97 	uint32		element_size;
98 	uint16		max_free_count;
99 	heap_page *	page_list; // sorted so that the desired page is always first
100 } heap_bin;
101 
102 
103 struct heap_allocator_s {
104 	rw_lock		area_lock;
105 	mutex		page_lock;
106 
107 	const char *name;
108 	uint32		bin_count;
109 	uint32		page_size;
110 
111 	uint32		total_pages;
112 	uint32		total_free_pages;
113 	uint32		empty_areas;
114 
115 #if KERNEL_HEAP_LEAK_CHECK
116 	addr_t		(*get_caller)();
117 #endif
118 
119 	heap_bin *	bins;
120 	heap_area *	areas; // sorted so that the desired area is always first
121 	heap_area *	all_areas; // all areas including full ones
122 };
123 
124 
125 static const uint32 kAreaAllocationMagic = 'AAMG';
126 typedef struct area_allocation_info_s {
127 	area_id		area;
128 	void *		base;
129 	uint32		magic;
130 	size_t		size;
131 	size_t		allocation_size;
132 	size_t		allocation_alignment;
133 	void *		allocation_base;
134 } area_allocation_info;
135 
136 
137 struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
138 };
139 
140 
141 typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
142 typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
143 
144 
145 // Heap class configuration
146 #define HEAP_CLASS_COUNT 3
147 static heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
148 	{
149 		"small",					/* name */
150 		50,							/* initial percentage */
151 		B_PAGE_SIZE / 8,			/* max allocation size */
152 		B_PAGE_SIZE,				/* page size */
153 		8,							/* min bin size */
154 		4,							/* bin alignment */
155 		8,							/* min count per page */
156 		16							/* max waste per page */
157 	},
158 	{
159 		"medium",					/* name */
160 		30,							/* initial percentage */
161 		B_PAGE_SIZE * 2,			/* max allocation size */
162 		B_PAGE_SIZE * 8,			/* page size */
163 		B_PAGE_SIZE / 8,			/* min bin size */
164 		32,							/* bin alignment */
165 		4,							/* min count per page */
166 		64							/* max waste per page */
167 	},
168 	{
169 		"large",					/* name */
170 		20,							/* initial percentage */
171 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
172 		B_PAGE_SIZE * 16,			/* page size */
173 		B_PAGE_SIZE * 2,			/* min bin size */
174 		128,						/* bin alignment */
175 		1,							/* min count per page */
176 		256							/* max waste per page */
177 	}
178 };
179 
180 
181 static uint32 sHeapCount;
182 static heap_allocator *sHeaps[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
183 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
184 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * B_MAX_CPU_COUNT];
185 
186 static heap_allocator *sGrowHeap = NULL;
187 static thread_id sHeapGrowThread = -1;
188 static sem_id sHeapGrowSem = -1;
189 static sem_id sHeapGrownNotify = -1;
190 static bool sAddGrowHeap = false;
191 static DeferredFreeList sDeferredFreeList;
192 static DeferredDeletableList sDeferredDeletableList;
193 static spinlock sDeferredFreeListLock;
194 
195 
196 
197 // #pragma mark - Tracing
198 
199 #if KERNEL_HEAP_TRACING
200 namespace KernelHeapTracing {
201 
202 class Allocate : public AbstractTraceEntry {
203 	public:
204 		Allocate(addr_t address, size_t size)
205 			:	fAddress(address),
206 				fSize(size)
207 		{
208 			Initialized();
209 		}
210 
211 		virtual void AddDump(TraceOutput &out)
212 		{
213 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
214 		}
215 
216 	private:
217 		addr_t	fAddress;
218 		size_t	fSize;
219 };
220 
221 
222 class Reallocate : public AbstractTraceEntry {
223 	public:
224 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
225 			:	fOldAddress(oldAddress),
226 				fNewAddress(newAddress),
227 				fNewSize(newSize)
228 		{
229 			Initialized();
230 		};
231 
232 		virtual void AddDump(TraceOutput &out)
233 		{
234 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
235 				fOldAddress, fNewAddress, fNewSize);
236 		}
237 
238 	private:
239 		addr_t	fOldAddress;
240 		addr_t	fNewAddress;
241 		size_t	fNewSize;
242 };
243 
244 
245 class Free : public AbstractTraceEntry {
246 	public:
247 		Free(addr_t address)
248 			:	fAddress(address)
249 		{
250 			Initialized();
251 		};
252 
253 		virtual void AddDump(TraceOutput &out)
254 		{
255 			out.Print("heap free: 0x%08lx", fAddress);
256 		}
257 
258 	private:
259 		addr_t	fAddress;
260 };
261 
262 
263 } // namespace KernelHeapTracing
264 
265 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
266 #else
267 #	define T(x)	;
268 #endif
269 
270 
271 // #pragma mark - Debug functions
272 
273 
274 #if KERNEL_HEAP_LEAK_CHECK
275 static addr_t
276 get_caller()
277 {
278 	// Find the first return address outside of the allocator code. Note, that
279 	// this makes certain assumptions about how the code for the functions
280 	// ends up in the kernel object.
281 	addr_t returnAddresses[5];
282 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
283 		STACK_TRACE_KERNEL);
284 	for (int32 i = 0; i < depth; i++) {
285 		if (returnAddresses[i] < (addr_t)&get_caller
286 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
287 			return returnAddresses[i];
288 		}
289 	}
290 
291 	return 0;
292 }
293 #endif
294 
295 
296 static void
297 dump_page(heap_page *page)
298 {
299 	uint32 count = 0;
300 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
301 		count++;
302 
303 	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
304 		"free_list %p (%lu entr%s)\n", page, page->bin_index, page->free_count,
305 		page->empty_index, page->free_list, count, count == 1 ? "y" : "ies");
306 }
307 
308 
309 static void
310 dump_bin(heap_bin *bin)
311 {
312 	uint32 count = 0;
313 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
314 		count++;
315 
316 	kprintf("\telement_size: %lu; max_free_count: %u; page_list %p (%lu pages"
317 		");\n", bin->element_size, bin->max_free_count, bin->page_list, count);
318 
319 	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
320 		dump_page(page);
321 }
322 
323 
324 static void
325 dump_bin_list(heap_allocator *heap)
326 {
327 	for (uint32 i = 0; i < heap->bin_count; i++)
328 		dump_bin(&heap->bins[i]);
329 	kprintf("\n");
330 }
331 
332 
333 static void
334 dump_allocator_areas(heap_allocator *heap)
335 {
336 	heap_area *area = heap->all_areas;
337 	while (area) {
338 		kprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: "
339 			"%lu; free_pages: %p (%lu entr%s)\n", area, area->area, area->base,
340 			area->size, area->page_count, area->free_pages,
341 			area->free_page_count, area->free_page_count == 1 ? "y" : "ies");
342 		area = area->all_next;
343 	}
344 
345 	kprintf("\n");
346 }
347 
348 
349 static void
350 dump_allocator(heap_allocator *heap, bool areas, bool bins)
351 {
352 	kprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: "
353 		"%lu; free_pages: %lu; empty_areas: %lu\n", heap, heap->name,
354 		heap->page_size, heap->bin_count, heap->total_pages,
355 		heap->total_free_pages, heap->empty_areas);
356 
357 	if (areas)
358 		dump_allocator_areas(heap);
359 	if (bins)
360 		dump_bin_list(heap);
361 }
362 
363 
364 static int
365 dump_heap_list(int argc, char **argv)
366 {
367 	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
368 		// only dump dedicated grow heap info
369 		kprintf("dedicated grow heap:\n");
370 		dump_allocator(sGrowHeap, true, true);
371 		return 0;
372 	}
373 
374 	bool stats = false;
375 	int i = 1;
376 
377 	if (strcmp(argv[1], "stats") == 0) {
378 		stats = true;
379 		i++;
380 	}
381 
382 	uint64 heapAddress = 0;
383 	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
384 		print_debugger_command_usage(argv[0]);
385 		return 0;
386 	}
387 
388 	if (heapAddress == 0) {
389 		// dump default kernel heaps
390 		for (uint32 i = 0; i < sHeapCount; i++)
391 			dump_allocator(sHeaps[i], !stats, !stats);
392 	} else {
393 		// dump specified heap
394 		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
395 	}
396 
397 	return 0;
398 }
399 
400 
401 #if !KERNEL_HEAP_LEAK_CHECK
402 
403 static int
404 dump_allocations(int argc, char **argv)
405 {
406 	uint64 heapAddress = 0;
407 	bool statsOnly = false;
408 	for (int32 i = 1; i < argc; i++) {
409 		if (strcmp(argv[i], "stats") == 0)
410 			statsOnly = true;
411 		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
412 			print_debugger_command_usage(argv[0]);
413 			return 0;
414 		}
415 	}
416 
417 	size_t totalSize = 0;
418 	uint32 totalCount = 0;
419 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
420 		heap_allocator *heap = sHeaps[heapIndex];
421 		if (heapAddress != 0)
422 			heap = (heap_allocator *)(addr_t)heapAddress;
423 
424 		// go through all the pages in all the areas
425 		heap_area *area = heap->all_areas;
426 		while (area) {
427 			for (uint32 i = 0; i < area->page_count; i++) {
428 				heap_page *page = &area->page_table[i];
429 				if (!page->in_use)
430 					continue;
431 
432 				addr_t base = area->base + i * heap->page_size;
433 				if (page->bin_index < heap->bin_count) {
434 					// page is used by a small allocation bin
435 					uint32 elementCount = page->empty_index;
436 					size_t elementSize
437 						= heap->bins[page->bin_index].element_size;
438 					for (uint32 j = 0; j < elementCount;
439 							j++, base += elementSize) {
440 						// walk the free list to see if this element is in use
441 						bool elementInUse = true;
442 						for (addr_t *temp = page->free_list; temp != NULL;
443 								temp = (addr_t *)*temp) {
444 							if ((addr_t)temp == base) {
445 								elementInUse = false;
446 								break;
447 							}
448 						}
449 
450 						if (!elementInUse)
451 							continue;
452 
453 						if (!statsOnly) {
454 							kprintf("address: 0x%08lx; size: %lu bytes\n",
455 								base, elementSize);
456 						}
457 
458 						totalSize += elementSize;
459 						totalCount++;
460 					}
461 				} else {
462 					// page is used by a big allocation, find the page count
463 					uint32 pageCount = 1;
464 					while (i + pageCount < area->page_count
465 						&& area->page_table[i + pageCount].in_use
466 						&& area->page_table[i + pageCount].bin_index
467 							== heap->bin_count
468 						&& area->page_table[i + pageCount].allocation_id
469 							== page->allocation_id)
470 						pageCount++;
471 
472 					size_t size = pageCount * heap->page_size;
473 
474 					if (!statsOnly) {
475 						kprintf("address: 0x%08lx; size: %lu bytes\n", base,
476 							size);
477 					}
478 
479 					totalSize += size;
480 					totalCount++;
481 
482 					// skip the allocated pages
483 					i += pageCount - 1;
484 				}
485 			}
486 
487 			area = area->all_next;
488 		}
489 
490 		if (heapAddress != 0)
491 			break;
492 	}
493 
494 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
495 	return 0;
496 }
497 
498 #else // !KERNEL_HEAP_LEAK_CHECK
499 
500 static int
501 dump_allocations(int argc, char **argv)
502 {
503 	team_id team = -1;
504 	thread_id thread = -1;
505 	addr_t caller = 0;
506 	addr_t address = 0;
507 	bool statsOnly = false;
508 
509 	for (int32 i = 1; i < argc; i++) {
510 		if (strcmp(argv[i], "team") == 0)
511 			team = parse_expression(argv[++i]);
512 		else if (strcmp(argv[i], "thread") == 0)
513 			thread = parse_expression(argv[++i]);
514 		else if (strcmp(argv[i], "caller") == 0)
515 			caller = parse_expression(argv[++i]);
516 		else if (strcmp(argv[i], "address") == 0)
517 			address = parse_expression(argv[++i]);
518 		else if (strcmp(argv[i], "stats") == 0)
519 			statsOnly = true;
520 		else {
521 			print_debugger_command_usage(argv[0]);
522 			return 0;
523 		}
524 	}
525 
526 	size_t totalSize = 0;
527 	uint32 totalCount = 0;
528 	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
529 		heap_allocator *heap = sHeaps[heapIndex];
530 
531 		// go through all the pages in all the areas
532 		heap_area *area = heap->all_areas;
533 		while (area) {
534 			heap_leak_check_info *info = NULL;
535 			for (uint32 i = 0; i < area->page_count; i++) {
536 				heap_page *page = &area->page_table[i];
537 				if (!page->in_use)
538 					continue;
539 
540 				addr_t base = area->base + i * heap->page_size;
541 				if (page->bin_index < heap->bin_count) {
542 					// page is used by a small allocation bin
543 					uint32 elementCount = page->empty_index;
544 					size_t elementSize
545 						= heap->bins[page->bin_index].element_size;
546 					for (uint32 j = 0; j < elementCount;
547 							j++, base += elementSize) {
548 						// walk the free list to see if this element is in use
549 						bool elementInUse = true;
550 						for (addr_t *temp = page->free_list; temp != NULL;
551 								temp = (addr_t *)*temp) {
552 							if ((addr_t)temp == base) {
553 								elementInUse = false;
554 								break;
555 							}
556 						}
557 
558 						if (!elementInUse)
559 							continue;
560 
561 						info = (heap_leak_check_info *)(base + elementSize
562 							- sizeof(heap_leak_check_info));
563 
564 						if ((team == -1 || info->team == team)
565 							&& (thread == -1 || info->thread == thread)
566 							&& (caller == 0 || info->caller == caller)
567 							&& (address == 0 || base == address)) {
568 							// interesting...
569 							if (!statsOnly) {
570 								kprintf("team: % 6ld; thread: % 6ld; "
571 									"address: 0x%08lx; size: %lu bytes; "
572 									"caller: %#lx\n", info->team, info->thread,
573 									base, info->size, info->caller);
574 							}
575 
576 							totalSize += info->size;
577 							totalCount++;
578 						}
579 					}
580 				} else {
581 					// page is used by a big allocation, find the page count
582 					uint32 pageCount = 1;
583 					while (i + pageCount < area->page_count
584 						&& area->page_table[i + pageCount].in_use
585 						&& area->page_table[i + pageCount].bin_index
586 							== heap->bin_count
587 						&& area->page_table[i + pageCount].allocation_id
588 							== page->allocation_id)
589 						pageCount++;
590 
591 					info = (heap_leak_check_info *)(base + pageCount
592 						* heap->page_size - sizeof(heap_leak_check_info));
593 
594 					if ((team == -1 || info->team == team)
595 						&& (thread == -1 || info->thread == thread)
596 						&& (caller == 0 || info->caller == caller)
597 						&& (address == 0 || base == address)) {
598 						// interesting...
599 						if (!statsOnly) {
600 							kprintf("team: % 6ld; thread: % 6ld;"
601 								" address: 0x%08lx; size: %lu bytes;"
602 								" caller: %#lx\n", info->team, info->thread,
603 								base, info->size, info->caller);
604 						}
605 
606 						totalSize += info->size;
607 						totalCount++;
608 					}
609 
610 					// skip the allocated pages
611 					i += pageCount - 1;
612 				}
613 			}
614 
615 			area = area->all_next;
616 		}
617 	}
618 
619 	kprintf("total allocations: %lu; total bytes: %lu\n", totalCount,
620 		totalSize);
621 	return 0;
622 }
623 
624 
625 static caller_info*
626 get_caller_info(addr_t caller)
627 {
628 	// find the caller info
629 	for (int32 i = 0; i < sCallerInfoCount; i++) {
630 		if (caller == sCallerInfoTable[i].caller)
631 			return &sCallerInfoTable[i];
632 	}
633 
634 	// not found, add a new entry, if there are free slots
635 	if (sCallerInfoCount >= kCallerInfoTableSize)
636 		return NULL;
637 
638 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
639 	info->caller = caller;
640 	info->count = 0;
641 	info->size = 0;
642 
643 	return info;
644 }
645 
646 
647 static int
648 caller_info_compare_size(const void* _a, const void* _b)
649 {
650 	const caller_info* a = (const caller_info*)_a;
651 	const caller_info* b = (const caller_info*)_b;
652 	return (int)(b->size - a->size);
653 }
654 
655 
656 static int
657 caller_info_compare_count(const void* _a, const void* _b)
658 {
659 	const caller_info* a = (const caller_info*)_a;
660 	const caller_info* b = (const caller_info*)_b;
661 	return (int)(b->count - a->count);
662 }
663 
664 
665 static bool
666 analyze_allocation_callers(heap_allocator *heap)
667 {
668 	// go through all the pages in all the areas
669 	heap_area *area = heap->all_areas;
670 	while (area) {
671 		heap_leak_check_info *info = NULL;
672 		for (uint32 i = 0; i < area->page_count; i++) {
673 			heap_page *page = &area->page_table[i];
674 			if (!page->in_use)
675 				continue;
676 
677 			addr_t base = area->base + i * heap->page_size;
678 			if (page->bin_index < heap->bin_count) {
679 				// page is used by a small allocation bin
680 				uint32 elementCount = page->empty_index;
681 				size_t elementSize = heap->bins[page->bin_index].element_size;
682 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
683 					// walk the free list to see if this element is in use
684 					bool elementInUse = true;
685 					for (addr_t *temp = page->free_list; temp != NULL;
686 						temp = (addr_t *)*temp) {
687 						if ((addr_t)temp == base) {
688 							elementInUse = false;
689 							break;
690 						}
691 					}
692 
693 					if (!elementInUse)
694 						continue;
695 
696 					info = (heap_leak_check_info *)(base + elementSize
697 						- sizeof(heap_leak_check_info));
698 
699 					caller_info *callerInfo = get_caller_info(info->caller);
700 					if (callerInfo == NULL) {
701 						kprintf("out of space for caller infos\n");
702 						return 0;
703 					}
704 
705 					callerInfo->count++;
706 					callerInfo->size += info->size;
707 				}
708 			} else {
709 				// page is used by a big allocation, find the page count
710 				uint32 pageCount = 1;
711 				while (i + pageCount < area->page_count
712 					&& area->page_table[i + pageCount].in_use
713 					&& area->page_table[i + pageCount].bin_index
714 						== heap->bin_count
715 					&& area->page_table[i + pageCount].allocation_id
716 						== page->allocation_id) {
717 					pageCount++;
718 				}
719 
720 				info = (heap_leak_check_info *)(base + pageCount
721 					* heap->page_size - sizeof(heap_leak_check_info));
722 
723 				caller_info *callerInfo = get_caller_info(info->caller);
724 				if (callerInfo == NULL) {
725 					kprintf("out of space for caller infos\n");
726 					return false;
727 				}
728 
729 				callerInfo->count++;
730 				callerInfo->size += info->size;
731 
732 				// skip the allocated pages
733 				i += pageCount - 1;
734 			}
735 		}
736 
737 		area = area->all_next;
738 	}
739 
740 	return true;
741 }
742 
743 
744 static int
745 dump_allocations_per_caller(int argc, char **argv)
746 {
747 	bool sortBySize = true;
748 	heap_allocator *heap = NULL;
749 
750 	for (int32 i = 1; i < argc; i++) {
751 		if (strcmp(argv[i], "-c") == 0) {
752 			sortBySize = false;
753 		} else if (strcmp(argv[i], "-h") == 0) {
754 			uint64 heapAddress;
755 			if (++i >= argc
756 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
757 				print_debugger_command_usage(argv[0]);
758 				return 0;
759 			}
760 
761 			heap = (heap_allocator*)(addr_t)heapAddress;
762 		} else {
763 			print_debugger_command_usage(argv[0]);
764 			return 0;
765 		}
766 	}
767 
768 	sCallerInfoCount = 0;
769 
770 	if (heap != NULL) {
771 		if (!analyze_allocation_callers(heap))
772 			return 0;
773 	} else {
774 		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
775 			if (!analyze_allocation_callers(sHeaps[heapIndex]))
776 				return 0;
777 		}
778 	}
779 
780 	// sort the array
781 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
782 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
783 
784 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
785 		sortBySize ? "size" : "count");
786 
787 	kprintf("     count        size      caller\n");
788 	kprintf("----------------------------------\n");
789 	for (int32 i = 0; i < sCallerInfoCount; i++) {
790 		caller_info& info = sCallerInfoTable[i];
791 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
792 
793 		const char *symbol;
794 		const char *imageName;
795 		bool exactMatch;
796 		addr_t baseAddress;
797 
798 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
799 				&imageName, &exactMatch) == B_OK) {
800 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
801 				info.caller - baseAddress, imageName,
802 				exactMatch ? "" : " (nearest)");
803 		} else
804 			kprintf("\n");
805 	}
806 
807 	return 0;
808 }
809 
810 #endif // KERNEL_HEAP_LEAK_CHECK
811 
812 
813 #if PARANOID_HEAP_VALIDATION
814 static void
815 heap_validate_heap(heap_allocator *heap)
816 {
817 	ReadLocker areaReadLocker(heap->area_lock);
818 	for (uint32 i = 0; i < heap->bin_count; i++)
819 		mutex_lock(&heap->bins[i].lock);
820 	MutexLocker pageLocker(heap->page_lock);
821 
822 	uint32 totalPageCount = 0;
823 	uint32 totalFreePageCount = 0;
824 	heap_area *area = heap->all_areas;
825 	while (area != NULL) {
826 		// validate the free pages list
827 		uint32 freePageCount = 0;
828 		heap_page *lastPage = NULL;
829 		heap_page *page = area->free_pages;
830 		while (page) {
831 			if ((addr_t)page < (addr_t)&area->page_table[0]
832 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
833 				panic("free page is not part of the page table\n");
834 
835 			if (page->index >= area->page_count)
836 				panic("free page has invalid index\n");
837 
838 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
839 				panic("free page index does not lead to target page\n");
840 
841 			if (page->prev != lastPage)
842 				panic("free page entry has invalid prev link\n");
843 
844 			if (page->in_use)
845 				panic("free page marked as in use\n");
846 
847 			lastPage = page;
848 			page = page->next;
849 			freePageCount++;
850 		}
851 
852 		totalPageCount += freePageCount;
853 		totalFreePageCount += freePageCount;
854 		if (area->free_page_count != freePageCount)
855 			panic("free page count doesn't match free page list\n");
856 
857 		// validate the page table
858 		uint32 usedPageCount = 0;
859 		for (uint32 i = 0; i < area->page_count; i++) {
860 			if (area->page_table[i].in_use)
861 				usedPageCount++;
862 		}
863 
864 		totalPageCount += usedPageCount;
865 		if (freePageCount + usedPageCount != area->page_count) {
866 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
867 				freePageCount, usedPageCount, area->page_count);
868 		}
869 
870 		area = area->all_next;
871 	}
872 
873 	// validate the areas
874 	area = heap->areas;
875 	heap_area *lastArea = NULL;
876 	uint32 lastFreeCount = 0;
877 	while (area != NULL) {
878 		if (area->free_page_count < lastFreeCount)
879 			panic("size ordering of area list broken\n");
880 
881 		if (area->prev != lastArea)
882 			panic("area list entry has invalid prev link\n");
883 
884 		lastArea = area;
885 		lastFreeCount = area->free_page_count;
886 		area = area->next;
887 	}
888 
889 	lastArea = NULL;
890 	area = heap->all_areas;
891 	while (area != NULL) {
892 		if (lastArea != NULL && lastArea->base < area->base)
893 			panic("base ordering of all_areas list broken\n");
894 
895 		lastArea = area;
896 		area = area->all_next;
897 	}
898 
899 	// validate the bins
900 	for (uint32 i = 0; i < heap->bin_count; i++) {
901 		heap_bin *bin = &heap->bins[i];
902 		heap_page *lastPage = NULL;
903 		heap_page *page = bin->page_list;
904 		lastFreeCount = 0;
905 		while (page) {
906 			area = heap->all_areas;
907 			while (area) {
908 				if (area == page->area)
909 					break;
910 				area = area->all_next;
911 			}
912 
913 			if (area == NULL) {
914 				panic("page area not present in area list\n");
915 				page = page->next;
916 				continue;
917 			}
918 
919 			if ((addr_t)page < (addr_t)&area->page_table[0]
920 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
921 				panic("used page is not part of the page table\n");
922 
923 			if (page->index >= area->page_count)
924 				panic("used page has invalid index\n");
925 
926 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
927 				panic("used page index does not lead to target page\n");
928 
929 			if (page->prev != lastPage) {
930 				panic("used page entry has invalid prev link (%p vs %p bin "
931 					"%lu)\n", page->prev, lastPage, i);
932 			}
933 
934 			if (!page->in_use)
935 				panic("used page marked as not in use\n");
936 
937 			if (page->bin_index != i) {
938 				panic("used page with bin index %u in page list of bin %lu\n",
939 					page->bin_index, i);
940 			}
941 
942 			if (page->free_count < lastFreeCount)
943 				panic("ordering of bin page list broken\n");
944 
945 			// validate the free list
946 			uint32 freeSlotsCount = 0;
947 			addr_t *element = page->free_list;
948 			addr_t pageBase = area->base + page->index * heap->page_size;
949 			while (element) {
950 				if ((addr_t)element < pageBase
951 					|| (addr_t)element >= pageBase + heap->page_size)
952 					panic("free list entry out of page range\n");
953 
954 				if (((addr_t)element - pageBase) % bin->element_size != 0)
955 					panic("free list entry not on a element boundary\n");
956 
957 				element = (addr_t *)*element;
958 				freeSlotsCount++;
959 			}
960 
961 			uint32 slotCount = bin->max_free_count;
962 			if (page->empty_index > slotCount) {
963 				panic("empty index beyond slot count (%u with %lu slots)\n",
964 					page->empty_index, slotCount);
965 			}
966 
967 			freeSlotsCount += (slotCount - page->empty_index);
968 			if (freeSlotsCount > slotCount)
969 				panic("more free slots than fit into the page\n");
970 
971 			lastPage = page;
972 			lastFreeCount = page->free_count;
973 			page = page->next;
974 		}
975 	}
976 
977 	pageLocker.Unlock();
978 	for (uint32 i = 0; i < heap->bin_count; i++)
979 		mutex_unlock(&heap->bins[i].lock);
980 	areaReadLocker.Unlock();
981 }
982 #endif // PARANOID_HEAP_VALIDATION
983 
984 
985 // #pragma mark - Heap functions
986 
987 
988 void
989 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
990 {
991 	heap_area *area = (heap_area *)base;
992 	area->area = areaID;
993 
994 	base += sizeof(heap_area);
995 	size -= sizeof(heap_area);
996 
997 	uint32 pageCount = size / heap->page_size;
998 	size_t pageTableSize = pageCount * sizeof(heap_page);
999 	area->page_table = (heap_page *)base;
1000 	base += pageTableSize;
1001 	size -= pageTableSize;
1002 
1003 	// the rest is now actually usable memory (rounded to the next page)
1004 	area->base = ROUNDUP(base, B_PAGE_SIZE);
1005 	area->size = size & ~(B_PAGE_SIZE - 1);
1006 
1007 	// now we know the real page count
1008 	pageCount = area->size / heap->page_size;
1009 	area->page_count = pageCount;
1010 
1011 	// zero out the page table and fill in page indexes
1012 	memset((void *)area->page_table, 0, pageTableSize);
1013 	for (uint32 i = 0; i < pageCount; i++) {
1014 		area->page_table[i].area = area;
1015 		area->page_table[i].index = i;
1016 	}
1017 
1018 	// add all pages up into the free pages list
1019 	for (uint32 i = 1; i < pageCount; i++) {
1020 		area->page_table[i - 1].next = &area->page_table[i];
1021 		area->page_table[i].prev = &area->page_table[i - 1];
1022 	}
1023 	area->free_pages = &area->page_table[0];
1024 	area->free_page_count = pageCount;
1025 	area->page_table[0].prev = NULL;
1026 	area->next = NULL;
1027 
1028 	WriteLocker areaWriteLocker(heap->area_lock);
1029 	MutexLocker pageLocker(heap->page_lock);
1030 	if (heap->areas == NULL) {
1031 		// it's the only (empty) area in that heap
1032 		area->prev = NULL;
1033 		heap->areas = area;
1034 	} else {
1035 		// link in this area as the last one as it is completely empty
1036 		heap_area *lastArea = heap->areas;
1037 		while (lastArea->next != NULL)
1038 			lastArea = lastArea->next;
1039 
1040 		lastArea->next = area;
1041 		area->prev = lastArea;
1042 	}
1043 
1044 	// insert this area in the all_areas list so it stays ordered by base
1045 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1046 		area->all_next = heap->all_areas;
1047 		heap->all_areas = area;
1048 	} else {
1049 		heap_area *insert = heap->all_areas;
1050 		while (insert->all_next && insert->all_next->base > area->base)
1051 			insert = insert->all_next;
1052 
1053 		area->all_next = insert->all_next;
1054 		insert->all_next = area;
1055 	}
1056 
1057 	heap->total_pages += area->page_count;
1058 	heap->total_free_pages += area->free_page_count;
1059 
1060 	if (areaID >= 0) {
1061 		// this later on deletable area is yet empty - the empty count will be
1062 		// decremented as soon as this area is used for the first time
1063 		heap->empty_areas++;
1064 	}
1065 
1066 	pageLocker.Unlock();
1067 	areaWriteLocker.Unlock();
1068 
1069 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx "
1070 		"- 0x%08lx\n", area->area, heap->name, heap, area->base,
1071 		area->base + area->size);
1072 }
1073 
1074 
1075 static status_t
1076 heap_remove_area(heap_allocator *heap, heap_area *area)
1077 {
1078 	if (area->free_page_count != area->page_count) {
1079 		panic("tried removing heap area that has still pages in use");
1080 		return B_ERROR;
1081 	}
1082 
1083 	if (area->prev == NULL && area->next == NULL) {
1084 		panic("tried removing the last non-full heap area");
1085 		return B_ERROR;
1086 	}
1087 
1088 	if (heap->areas == area)
1089 		heap->areas = area->next;
1090 	if (area->prev != NULL)
1091 		area->prev->next = area->next;
1092 	if (area->next != NULL)
1093 		area->next->prev = area->prev;
1094 
1095 	if (heap->all_areas == area)
1096 		heap->all_areas = area->all_next;
1097 	else {
1098 		heap_area *previous = heap->all_areas;
1099 		while (previous) {
1100 			if (previous->all_next == area) {
1101 				previous->all_next = area->all_next;
1102 				break;
1103 			}
1104 
1105 			previous = previous->all_next;
1106 		}
1107 
1108 		if (previous == NULL)
1109 			panic("removing heap area that is not in all list");
1110 	}
1111 
1112 	heap->total_pages -= area->page_count;
1113 	heap->total_free_pages -= area->free_page_count;
1114 
1115 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed "
1116 		"from %s heap %p\n", area->area, area->base, area->base + area->size,
1117 		heap->name, heap);
1118 
1119 	return B_OK;
1120 }
1121 
1122 
1123 heap_allocator *
1124 heap_create_allocator(const char *name, addr_t base, size_t size,
1125 	const heap_class *heapClass, bool allocateOnHeap)
1126 {
1127 	heap_allocator *heap;
1128 	if (allocateOnHeap) {
1129 		// allocate seperately on the heap
1130 		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1131 			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1132 	} else {
1133 		// use up the first part of the area
1134 		heap = (heap_allocator *)base;
1135 		base += sizeof(heap_allocator);
1136 		size -= sizeof(heap_allocator);
1137 	}
1138 
1139 	heap->name = name;
1140 	heap->page_size = heapClass->page_size;
1141 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1142 	heap->areas = heap->all_areas = NULL;
1143 	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1144 
1145 #if KERNEL_HEAP_LEAK_CHECK
1146 	heap->get_caller = &get_caller;
1147 #endif
1148 
1149 	heap->bin_count = 0;
1150 	size_t binSize = 0, lastSize = 0;
1151 	uint32 count = heap->page_size / heapClass->min_bin_size;
1152 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1153 		if (heap->bin_count >= MAX_BIN_COUNT)
1154 			panic("heap configuration invalid - max bin count reached\n");
1155 
1156 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1157 		if (binSize == lastSize)
1158 			continue;
1159 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1160 			continue;
1161 
1162 		heap_bin *bin = &heap->bins[heap->bin_count];
1163 		mutex_init(&bin->lock, "heap bin lock");
1164 		bin->element_size = binSize;
1165 		bin->max_free_count = heap->page_size / binSize;
1166 		bin->page_list = NULL;
1167 		heap->bin_count++;
1168 	};
1169 
1170 	if (!allocateOnHeap) {
1171 		base += heap->bin_count * sizeof(heap_bin);
1172 		size -= heap->bin_count * sizeof(heap_bin);
1173 	}
1174 
1175 	rw_lock_init(&heap->area_lock, "heap area rw lock");
1176 	mutex_init(&heap->page_lock, "heap page lock");
1177 
1178 	heap_add_area(heap, -1, base, size);
1179 	return heap;
1180 }
1181 
1182 
1183 static inline void
1184 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1185 {
1186 	area->free_page_count += pageCount;
1187 	heap->total_free_pages += pageCount;
1188 
1189 	if (area->free_page_count == pageCount) {
1190 		// we need to add ourselfs to the area list of the heap
1191 		area->prev = NULL;
1192 		area->next = heap->areas;
1193 		if (area->next)
1194 			area->next->prev = area;
1195 		heap->areas = area;
1196 	} else {
1197 		// we might need to move back in the area list
1198 		if (area->next && area->next->free_page_count < area->free_page_count) {
1199 			// move ourselfs so the list stays ordered
1200 			heap_area *insert = area->next;
1201 			while (insert->next
1202 				&& insert->next->free_page_count < area->free_page_count)
1203 				insert = insert->next;
1204 
1205 			if (area->prev)
1206 				area->prev->next = area->next;
1207 			if (area->next)
1208 				area->next->prev = area->prev;
1209 			if (heap->areas == area)
1210 				heap->areas = area->next;
1211 
1212 			area->prev = insert;
1213 			area->next = insert->next;
1214 			if (area->next)
1215 				area->next->prev = area;
1216 			insert->next = area;
1217 		}
1218 	}
1219 
1220 	if (area->free_page_count == area->page_count && area->area >= 0)
1221 		heap->empty_areas++;
1222 }
1223 
1224 
1225 static inline void
1226 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1227 {
1228 	if (area->free_page_count == area->page_count && area->area >= 0) {
1229 		// this area was completely empty
1230 		heap->empty_areas--;
1231 	}
1232 
1233 	area->free_page_count -= pageCount;
1234 	heap->total_free_pages -= pageCount;
1235 
1236 	if (area->free_page_count == 0) {
1237 		// the area is now full so we remove it from the area list
1238 		if (area->prev)
1239 			area->prev->next = area->next;
1240 		if (area->next)
1241 			area->next->prev = area->prev;
1242 		if (heap->areas == area)
1243 			heap->areas = area->next;
1244 		area->next = area->prev = NULL;
1245 	} else {
1246 		// we might need to move forward in the area list
1247 		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1248 			// move ourselfs so the list stays ordered
1249 			heap_area *insert = area->prev;
1250 			while (insert->prev
1251 				&& insert->prev->free_page_count > area->free_page_count)
1252 				insert = insert->prev;
1253 
1254 			if (area->prev)
1255 				area->prev->next = area->next;
1256 			if (area->next)
1257 				area->next->prev = area->prev;
1258 
1259 			area->prev = insert->prev;
1260 			area->next = insert;
1261 			if (area->prev)
1262 				area->prev->next = area;
1263 			if (heap->areas == insert)
1264 				heap->areas = area;
1265 			insert->prev = area;
1266 		}
1267 	}
1268 }
1269 
1270 
1271 static inline void
1272 heap_link_page(heap_page *page, heap_page **list)
1273 {
1274 	page->prev = NULL;
1275 	page->next = *list;
1276 	if (page->next)
1277 		page->next->prev = page;
1278 	*list = page;
1279 }
1280 
1281 
1282 static inline void
1283 heap_unlink_page(heap_page *page, heap_page **list)
1284 {
1285 	if (page->prev)
1286 		page->prev->next = page->next;
1287 	if (page->next)
1288 		page->next->prev = page->prev;
1289 	if (list && *list == page) {
1290 		*list = page->next;
1291 		if (page->next)
1292 			page->next->prev = NULL;
1293 	}
1294 }
1295 
1296 
1297 static heap_page *
1298 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount)
1299 {
1300 	MutexLocker pageLocker(heap->page_lock);
1301 	heap_area *area = heap->areas;
1302 	while (area) {
1303 		bool found = false;
1304 		int32 first = -1;
1305 		for (uint32 i = 0; i < area->page_count; i++) {
1306 			if (area->page_table[i].in_use) {
1307 				first = -1;
1308 				continue;
1309 			}
1310 
1311 			if (first < 0)
1312 				first = i;
1313 
1314 			if (first >= 0) {
1315 				if ((i + 1 - first) == pageCount) {
1316 					found = true;
1317 					break;
1318 				}
1319 			}
1320 		}
1321 
1322 		if (!found) {
1323 			area = area->next;
1324 			continue;
1325 		}
1326 
1327 		for (uint32 i = first; i < first + pageCount; i++) {
1328 			heap_page *page = &area->page_table[i];
1329 			page->in_use = 1;
1330 			page->bin_index = heap->bin_count;
1331 
1332 			heap_unlink_page(page, &area->free_pages);
1333 
1334 			page->next = page->prev = NULL;
1335 			page->free_list = NULL;
1336 			page->allocation_id = (uint16)first;
1337 		}
1338 
1339 		heap_free_pages_removed(heap, area, pageCount);
1340 		return &area->page_table[first];
1341 	}
1342 
1343 	return NULL;
1344 }
1345 
1346 
1347 static void *
1348 heap_raw_alloc(heap_allocator *heap, size_t size, uint32 binIndex)
1349 {
1350 	TRACE(("raw_alloc: heap %p; size %lu; bin %lu\n", heap, size, binIndex));
1351 	if (binIndex < heap->bin_count) {
1352 		heap_bin *bin = &heap->bins[binIndex];
1353 		MutexLocker binLocker(bin->lock);
1354 
1355 		heap_page *page = bin->page_list;
1356 		if (page == NULL) {
1357 			MutexLocker pageLocker(heap->page_lock);
1358 			heap_area *area = heap->areas;
1359 			if (area == NULL) {
1360 				TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1361 					size));
1362 				return NULL;
1363 			}
1364 
1365 			// by design there are only areas in the list that still have
1366 			// free pages available
1367 			page = area->free_pages;
1368 			area->free_pages = page->next;
1369 			if (page->next)
1370 				page->next->prev = NULL;
1371 
1372 			heap_free_pages_removed(heap, area, 1);
1373 
1374 			if (page->in_use)
1375 				panic("got an in use page from the free pages list\n");
1376 			page->in_use = 1;
1377 
1378 			pageLocker.Unlock();
1379 
1380 			page->bin_index = binIndex;
1381 			page->free_count = bin->max_free_count;
1382 			page->empty_index = 0;
1383 			page->free_list = NULL;
1384 			page->next = page->prev = NULL;
1385 			bin->page_list = page;
1386 		}
1387 
1388 		// we have a page where we have a free slot
1389 		void *address = NULL;
1390 		if (page->free_list) {
1391 			// there's a previously freed entry we can use
1392 			address = page->free_list;
1393 			page->free_list = (addr_t *)*page->free_list;
1394 		} else {
1395 			// the page hasn't been fully allocated so use the next empty_index
1396 			address = (void *)(page->area->base + page->index * heap->page_size
1397 				+ page->empty_index * bin->element_size);
1398 			page->empty_index++;
1399 		}
1400 
1401 		page->free_count--;
1402 		if (page->free_count == 0) {
1403 			// the page is now full so we remove it from the page_list
1404 			bin->page_list = page->next;
1405 			if (page->next)
1406 				page->next->prev = NULL;
1407 			page->next = page->prev = NULL;
1408 		}
1409 
1410 		binLocker.Unlock();
1411 
1412 #if KERNEL_HEAP_LEAK_CHECK
1413 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1414 			+ bin->element_size - sizeof(heap_leak_check_info));
1415 		info->size = size - sizeof(heap_leak_check_info);
1416 		info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1417 		info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1418 		info->caller = heap->get_caller();
1419 #endif
1420 		return address;
1421 	}
1422 
1423 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1424 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount);
1425 	if (firstPage == NULL) {
1426 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1427 			heap, size));
1428 		return NULL;
1429 	}
1430 
1431 #if KERNEL_HEAP_LEAK_CHECK
1432 	heap_leak_check_info *info = (heap_leak_check_info *)(firstPage->area->base
1433 		+ (firstPage->index + pageCount) * heap->page_size
1434 		- sizeof(heap_leak_check_info));
1435 	info->size = size - sizeof(heap_leak_check_info);
1436 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1437 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1438 	info->caller = heap->get_caller();
1439 #endif
1440 	return (void *)(firstPage->area->base + firstPage->index * heap->page_size);
1441 }
1442 
1443 
1444 #if DEBUG
1445 static bool
1446 is_valid_alignment(size_t number)
1447 {
1448 	// this cryptic line accepts zero and all powers of two
1449 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1450 }
1451 #endif
1452 
1453 
1454 inline bool
1455 heap_should_grow(heap_allocator *heap)
1456 {
1457 	// suggest growing if there is less than 20% of a grow size available
1458 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1459 }
1460 
1461 
1462 void *
1463 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1464 {
1465 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1466 
1467 #if DEBUG
1468 	if (!is_valid_alignment(alignment))
1469 		panic("memalign() with an alignment which is not a power of 2\n");
1470 #endif
1471 
1472 #if KERNEL_HEAP_LEAK_CHECK
1473 	size += sizeof(heap_leak_check_info);
1474 #endif
1475 
1476 	if (alignment != 0) {
1477 		// TODO: The alignment is done by ensuring that the element size
1478 		// of the target bin is aligned with the requested alignment. This
1479 		// has the problem that it wastes space because a better (smaller) bin
1480 		// could possibly be selected. We should pick the best bin and check
1481 		// if there is an aligned block in the free list or if a new (page
1482 		// aligned) page has to be allocated anyway.
1483 		size = ROUNDUP(size, alignment);
1484 	}
1485 
1486 	uint32 binIndex;
1487 	for (binIndex = 0; binIndex < heap->bin_count; binIndex++) {
1488 		if (size <= heap->bins[binIndex].element_size)
1489 			break;
1490 	}
1491 
1492 	void *address = heap_raw_alloc(heap, size, binIndex);
1493 
1494 #if KERNEL_HEAP_LEAK_CHECK
1495 	size -= sizeof(heap_leak_check_info);
1496 #endif
1497 
1498 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1499 		size, address));
1500 
1501 	T(Allocate((addr_t)address, size));
1502 	if (address == NULL)
1503 		return address;
1504 
1505 #if PARANOID_KERNEL_MALLOC
1506 	memset(address, 0xcc, size);
1507 #endif
1508 
1509 #if PARANOID_KERNEL_FREE
1510 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1511 	// and the user does not clear it
1512 	if (((uint32 *)address)[1] == 0xdeadbeef)
1513 		((uint32 *)address)[1] = 0xcccccccc;
1514 #endif
1515 
1516 	return address;
1517 }
1518 
1519 
1520 status_t
1521 heap_free(heap_allocator *heap, void *address)
1522 {
1523 	if (address == NULL)
1524 		return B_OK;
1525 
1526 	ReadLocker areaReadLocker(heap->area_lock);
1527 	heap_area *area = heap->all_areas;
1528 	while (area) {
1529 		// since the all_areas list is ordered by base with the biggest
1530 		// base at the top, we need only find the first area with a base
1531 		// smaller than our address to become our only candidate for freeing
1532 		if (area->base <= (addr_t)address) {
1533 			if ((addr_t)address >= area->base + area->size) {
1534 				// none of the other areas can contain the address as the list
1535 				// is ordered
1536 				return B_ENTRY_NOT_FOUND;
1537 			}
1538 
1539 			// this area contains the allocation, we're done searching
1540 			break;
1541 		}
1542 
1543 		area = area->all_next;
1544 	}
1545 
1546 	if (area == NULL) {
1547 		// this address does not belong to us
1548 		return B_ENTRY_NOT_FOUND;
1549 	}
1550 
1551 	TRACE(("free(): asked to free pointer %p\n", address));
1552 
1553 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1554 		/ heap->page_size];
1555 
1556 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1557 		page->bin_index, page->free_count));
1558 
1559 	if (page->bin_index > heap->bin_count) {
1560 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1561 		return B_ERROR;
1562 	}
1563 
1564 	if (page->bin_index < heap->bin_count) {
1565 		// small allocation
1566 		heap_bin *bin = &heap->bins[page->bin_index];
1567 
1568 #if PARANOID_KERNEL_FREE
1569 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1570 			// This block looks like it was freed already, walk the free list
1571 			// on this page to make sure this address doesn't exist.
1572 			MutexLocker binLocker(bin->lock);
1573 			for (addr_t *temp = page->free_list; temp != NULL;
1574 					temp = (addr_t *)*temp) {
1575 				if (temp == address) {
1576 					panic("free(): address %p already exists in page free "
1577 						"list\n", address);
1578 					return B_ERROR;
1579 				}
1580 			}
1581 		}
1582 
1583 		// the first 4 bytes are overwritten with the next free list pointer
1584 		// later
1585 		uint32 *dead = (uint32 *)address;
1586 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1587 			dead[i] = 0xdeadbeef;
1588 #endif
1589 
1590 		MutexLocker binLocker(bin->lock);
1591 		if (((addr_t)address - area->base - page->index
1592 			* heap->page_size) % bin->element_size != 0) {
1593 			panic("free(): passed invalid pointer %p supposed to be in bin for "
1594 				"element size %ld\n", address, bin->element_size);
1595 			return B_ERROR;
1596 		}
1597 
1598 		// add the address to the page free list
1599 		*(addr_t *)address = (addr_t)page->free_list;
1600 		page->free_list = (addr_t *)address;
1601 		page->free_count++;
1602 
1603 		if (page->free_count == bin->max_free_count) {
1604 			// we are now empty, remove the page from the bin list
1605 			MutexLocker pageLocker(heap->page_lock);
1606 			heap_unlink_page(page, &bin->page_list);
1607 			page->in_use = 0;
1608 			heap_link_page(page, &area->free_pages);
1609 			heap_free_pages_added(heap, area, 1);
1610 		} else if (page->free_count == 1) {
1611 			// we need to add ourselfs to the page list of the bin
1612 			heap_link_page(page, &bin->page_list);
1613 		} else {
1614 			// we might need to move back in the free pages list
1615 			if (page->next && page->next->free_count < page->free_count) {
1616 				// move ourselfs so the list stays ordered
1617 				heap_page *insert = page->next;
1618 				while (insert->next
1619 					&& insert->next->free_count < page->free_count)
1620 					insert = insert->next;
1621 
1622 				heap_unlink_page(page, &bin->page_list);
1623 
1624 				page->prev = insert;
1625 				page->next = insert->next;
1626 				if (page->next)
1627 					page->next->prev = page;
1628 				insert->next = page;
1629 			}
1630 		}
1631 	} else {
1632 		// large allocation, just return the pages to the page free list
1633 		uint32 allocationID = page->allocation_id;
1634 		uint32 maxPages = area->page_count - page->index;
1635 		uint32 pageCount = 0;
1636 
1637 		MutexLocker pageLocker(heap->page_lock);
1638 		for (uint32 i = 0; i < maxPages; i++) {
1639 			// loop until we find the end of this allocation
1640 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1641 				|| page[i].allocation_id != allocationID)
1642 				break;
1643 
1644 			// this page still belongs to the same allocation
1645 			page[i].in_use = 0;
1646 			page[i].allocation_id = 0;
1647 
1648 			// return it to the free list
1649 			heap_link_page(&page[i], &area->free_pages);
1650 			pageCount++;
1651 		}
1652 
1653 		heap_free_pages_added(heap, area, pageCount);
1654 	}
1655 
1656 	T(Free((addr_t)address));
1657 	areaReadLocker.Unlock();
1658 
1659 	if (heap->empty_areas > 1) {
1660 		WriteLocker areaWriteLocker(heap->area_lock);
1661 		MutexLocker pageLocker(heap->page_lock);
1662 
1663 		area = heap->areas;
1664 		while (area != NULL && heap->empty_areas > 1) {
1665 			heap_area *next = area->next;
1666 			if (area->area >= 0
1667 				&& area->free_page_count == area->page_count
1668 				&& heap_remove_area(heap, area) == B_OK) {
1669 				delete_area(area->area);
1670 				heap->empty_areas--;
1671 			}
1672 
1673 			area = next;
1674 		}
1675 	}
1676 
1677 	return B_OK;
1678 }
1679 
1680 
1681 #if KERNEL_HEAP_LEAK_CHECK
1682 extern "C" void
1683 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1684 {
1685 	heap->get_caller = getCaller;
1686 }
1687 #endif
1688 
1689 
1690 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
1691 
1692 
1693 static status_t
1694 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1695 	size_t newSize)
1696 {
1697 	ReadLocker areaReadLocker(heap->area_lock);
1698 	heap_area *area = heap->all_areas;
1699 	while (area) {
1700 		// since the all_areas list is ordered by base with the biggest
1701 		// base at the top, we need only find the first area with a base
1702 		// smaller than our address to become our only candidate for
1703 		// reallocating
1704 		if (area->base <= (addr_t)address) {
1705 			if ((addr_t)address >= area->base + area->size) {
1706 				// none of the other areas can contain the address as the list
1707 				// is ordered
1708 				return B_ENTRY_NOT_FOUND;
1709 			}
1710 
1711 			// this area contains the allocation, we're done searching
1712 			break;
1713 		}
1714 
1715 		area = area->all_next;
1716 	}
1717 
1718 	if (area == NULL) {
1719 		// this address does not belong to us
1720 		return B_ENTRY_NOT_FOUND;
1721 	}
1722 
1723 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1724 
1725 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1726 		/ heap->page_size];
1727 	if (page->bin_index > heap->bin_count) {
1728 		panic("realloc(): page %p: invalid bin_index %d\n", page,
1729 			page->bin_index);
1730 		return B_ERROR;
1731 	}
1732 
1733 	// find out the size of the old allocation first
1734 	size_t minSize = 0;
1735 	size_t maxSize = 0;
1736 	if (page->bin_index < heap->bin_count) {
1737 		// this was a small allocation
1738 		heap_bin *bin = &heap->bins[page->bin_index];
1739 		maxSize = bin->element_size;
1740 		if (page->bin_index > 0)
1741 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1742 	} else {
1743 		// this was a large allocation
1744 		uint32 allocationID = page->allocation_id;
1745 		uint32 maxPages = area->page_count - page->index;
1746 		maxSize = heap->page_size;
1747 
1748 		MutexLocker pageLocker(heap->page_lock);
1749 		for (uint32 i = 1; i < maxPages; i++) {
1750 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1751 				|| page[i].allocation_id != allocationID)
1752 				break;
1753 
1754 			minSize += heap->page_size;
1755 			maxSize += heap->page_size;
1756 		}
1757 	}
1758 
1759 	areaReadLocker.Unlock();
1760 
1761 #if KERNEL_HEAP_LEAK_CHECK
1762 	newSize += sizeof(heap_leak_check_info);
1763 #endif
1764 
1765 	// does the new allocation simply fit in the old allocation?
1766 	if (newSize > minSize && newSize <= maxSize) {
1767 #if KERNEL_HEAP_LEAK_CHECK
1768 		// update the size info (the info is at the end so stays where it is)
1769 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1770 			+ maxSize - sizeof(heap_leak_check_info));
1771 		info->size = newSize - sizeof(heap_leak_check_info);
1772 		newSize -= sizeof(heap_leak_check_info);
1773 #endif
1774 
1775 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1776 		*newAddress = address;
1777 		return B_OK;
1778 	}
1779 
1780 #if KERNEL_HEAP_LEAK_CHECK
1781 	// new leak check info will be created with the malloc below
1782 	newSize -= sizeof(heap_leak_check_info);
1783 #endif
1784 
1785 	// if not, allocate a new chunk of memory
1786 	*newAddress = memalign(0, newSize);
1787 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1788 	if (*newAddress == NULL) {
1789 		// we tried but it didn't work out, but still the operation is done
1790 		return B_OK;
1791 	}
1792 
1793 	// copy the old data and free the old allocation
1794 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1795 	heap_free(heap, address);
1796 	return B_OK;
1797 }
1798 
1799 
1800 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
1801 
1802 
1803 inline uint32
1804 heap_index_for(size_t size, int32 cpu)
1805 {
1806 #if KERNEL_HEAP_LEAK_CHECK
1807 	// take the extra info size into account
1808 	size += sizeof(heap_leak_check_info_s);
1809 #endif
1810 
1811 	uint32 index = 0;
1812 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1813 		if (size <= sHeapClasses[index].max_allocation_size)
1814 			break;
1815 	}
1816 
1817 	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1818 }
1819 
1820 
1821 static void
1822 deferred_deleter(void *arg, int iteration)
1823 {
1824 	// move entries and deletables to on-stack lists
1825 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1826 	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
1827 		return;
1828 
1829 	DeferredFreeList entries;
1830 	entries.MoveFrom(&sDeferredFreeList);
1831 
1832 	DeferredDeletableList deletables;
1833 	deletables.MoveFrom(&sDeferredDeletableList);
1834 
1835 	locker.Unlock();
1836 
1837 	// free the entries
1838 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1839 		free(entry);
1840 
1841 	// delete the deletables
1842 	while (DeferredDeletable* deletable = deletables.RemoveHead())
1843 		delete deletable;
1844 }
1845 
1846 
1847 //	#pragma mark -
1848 
1849 
1850 static status_t
1851 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1852 {
1853 	void *address = NULL;
1854 	area_id heapArea = create_area(name, &address,
1855 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1856 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1857 	if (heapArea < B_OK) {
1858 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1859 		return heapArea;
1860 	}
1861 
1862 	heap_add_area(heap, heapArea, (addr_t)address, size);
1863 #if PARANOID_HEAP_VALIDATION
1864 	heap_validate_heap(heap);
1865 #endif
1866 	return B_OK;
1867 }
1868 
1869 
1870 static int32
1871 heap_grow_thread(void *)
1872 {
1873 	while (true) {
1874 		// wait for a request to grow the heap list
1875 		if (acquire_sem(sHeapGrowSem) < B_OK)
1876 			continue;
1877 
1878 		if (sAddGrowHeap) {
1879 			// the grow heap is going to run full soon, try to allocate a new
1880 			// one to make some room.
1881 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1882 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1883 					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1884 				dprintf("heap_grower: failed to create new grow heap area\n");
1885 		}
1886 
1887 		for (uint32 i = 0; i < sHeapCount; i++) {
1888 			heap_allocator *heap = sHeaps[i];
1889 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1890 				|| heap_should_grow(heap)) {
1891 				// grow this heap if it is nearly full or if a grow was
1892 				// explicitly requested for this heap (happens when a large
1893 				// allocation cannot be fulfilled due to lack of contiguous
1894 				// pages)
1895 				if (heap_create_new_heap_area(heap, "additional heap",
1896 						HEAP_GROW_SIZE) != B_OK)
1897 					dprintf("heap_grower: failed to create new heap area\n");
1898 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1899 			}
1900 		}
1901 
1902 		// notify anyone waiting for this request
1903 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1904 	}
1905 
1906 	return 0;
1907 }
1908 
1909 
1910 status_t
1911 heap_init(addr_t base, size_t size)
1912 {
1913 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1914 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
1915 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
1916 			&sHeapClasses[i], false);
1917 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
1918 		base += partSize;
1919 		sHeapCount++;
1920 	}
1921 
1922 	// set up some debug commands
1923 	add_debugger_command_etc("heap", &dump_heap_list,
1924 		"Dump infos about the kernel heap(s)",
1925 		"[(\"grow\" | \"stats\" | <heap>)]\n"
1926 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
1927 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
1928 		"given as the argument, currently only the heap count is printed.\n"
1929 		"If <heap> is given, it is interpreted as the address of the heap to\n"
1930 		"print infos about.\n", 0);
1931 #if !KERNEL_HEAP_LEAK_CHECK
1932 	add_debugger_command_etc("allocations", &dump_allocations,
1933 		"Dump current heap allocations",
1934 		"[\"stats\"] [<heap>]\n"
1935 		"If no parameters are given, all current alloactions are dumped.\n"
1936 		"If the optional argument \"stats\" is specified, only the allocation\n"
1937 		"counts and no individual allocations are printed\n"
1938 		"If a specific heap address is given, only allocations of this\n"
1939 		"allocator are dumped\n", 0);
1940 #else // !KERNEL_HEAP_LEAK_CHECK
1941 	add_debugger_command_etc("allocations", &dump_allocations,
1942 		"Dump current heap allocations",
1943 		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
1944 		"If no parameters are given, all current alloactions are dumped.\n"
1945 		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
1946 		"argument, only allocations matching the team ID, thread ID, caller\n"
1947 		"address or allocated address given in the second argument are printed.\n"
1948 		"If the optional argument \"stats\" is specified, only the allocation\n"
1949 		"counts and no individual allocations are printed\n", 0);
1950 	add_debugger_command_etc("allocations_per_caller",
1951 		&dump_allocations_per_caller,
1952 		"Dump current heap allocations summed up per caller",
1953 		"[ \"-c\" ] [ -h <heap> ]\n"
1954 		"The current allocations will by summed up by caller (their count and\n"
1955 		"size) printed in decreasing order by size or, if \"-c\" is\n"
1956 		"specified, by allocation count. If given <heap> specifies the\n"
1957 		"address of the heap for which to print the allocations.\n", 0);
1958 #endif // KERNEL_HEAP_LEAK_CHECK
1959 	return B_OK;
1960 }
1961 
1962 
1963 status_t
1964 heap_init_post_sem()
1965 {
1966 #if PARANOID_KERNEL_MALLOC
1967 	vm_block_address_range("uninitialized heap memory",
1968 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1969 #endif
1970 #if PARANOID_KERNEL_FREE
1971 	vm_block_address_range("freed heap memory",
1972 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
1973 #endif
1974 
1975 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
1976 	if (sHeapGrowSem < 0) {
1977 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
1978 		return B_ERROR;
1979 	}
1980 
1981 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
1982 	if (sHeapGrownNotify < 0) {
1983 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
1984 		return B_ERROR;
1985 	}
1986 
1987 	return B_OK;
1988 }
1989 
1990 
1991 status_t
1992 heap_init_post_thread()
1993 {
1994 	void *address = NULL;
1995 	area_id growHeapArea = create_area("dedicated grow heap", &address,
1996 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
1997 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1998 	if (growHeapArea < 0) {
1999 		panic("heap_init_post_thread(): couldn't allocate dedicate grow heap "
2000 			"area");
2001 		return growHeapArea;
2002 	}
2003 
2004 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2005 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2006 	if (sGrowHeap == NULL) {
2007 		panic("heap_init_post_thread(): failed to create dedicated grow heap\n");
2008 		return B_ERROR;
2009 	}
2010 
2011 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2012 		B_URGENT_PRIORITY, NULL);
2013 	if (sHeapGrowThread < 0) {
2014 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2015 		return sHeapGrowThread;
2016 	}
2017 
2018 	// create per-cpu heaps if there's enough memory
2019 	int32 heapCount = MIN(smp_get_num_cpus(),
2020 		(int32)vm_page_num_pages() / 60 / 1024);
2021 	for (int32 i = 1; i < heapCount; i++) {
2022 		addr_t base = 0;
2023 		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2024 		area_id perCPUHeapArea = create_area("per cpu inital heap",
2025 			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2026 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2027 		if (perCPUHeapArea < 0)
2028 			break;
2029 
2030 		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2031 			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2032 			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2033 			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2034 				base, partSize, &sHeapClasses[j], false);
2035 			sLastGrowRequest[heapIndex] = 0;
2036 			sLastHandledGrowRequest[heapIndex] = 0;
2037 			base += partSize;
2038 			sHeapCount++;
2039 		}
2040 	}
2041 
2042 	// run the deferred deleter roughly once a second
2043 	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2044 		panic("heap_init_post_thread(): failed to init deferred deleter");
2045 
2046 	send_signal_etc(sHeapGrowThread, SIGCONT, B_DO_NOT_RESCHEDULE);
2047 	return B_OK;
2048 }
2049 
2050 
2051 //	#pragma mark - Public API
2052 
2053 
2054 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
2055 
2056 
2057 void *
2058 memalign(size_t alignment, size_t size)
2059 {
2060 	if (!gKernelStartup && !are_interrupts_enabled()) {
2061 		panic("memalign(): called with interrupts disabled\n");
2062 		return NULL;
2063 	}
2064 
2065 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2066 		// don't even attempt such a huge allocation - use areas instead
2067 		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2068 			+ alignment, B_PAGE_SIZE);
2069 		if (areaSize < size) {
2070 			// the size overflowed
2071 			return NULL;
2072 		}
2073 
2074 		void *address = NULL;
2075 		area_id allocationArea = create_area("memalign area", &address,
2076 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2077 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2078 		if (allocationArea < B_OK) {
2079 			dprintf("heap: failed to create area for huge allocation\n");
2080 			return NULL;
2081 		}
2082 
2083 		area_allocation_info *info = (area_allocation_info *)address;
2084 		info->magic = kAreaAllocationMagic;
2085 		info->area = allocationArea;
2086 		info->base = address;
2087 		info->size = areaSize;
2088 		info->allocation_size = size;
2089 		info->allocation_alignment = alignment;
2090 
2091 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2092 		if (alignment != 0) {
2093 			ASSERT((addr_t)address % alignment == 0);
2094 			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2095 			address = (void *)ROUNDUP((addr_t)address, alignment);
2096 		}
2097 
2098 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2099 			allocationArea, size));
2100 
2101 		info->allocation_base = address;
2102 
2103 #if PARANOID_KERNEL_MALLOC
2104 		memset(address, 0xcc, size);
2105 #endif
2106 		return address;
2107 	}
2108 
2109 	void *result = NULL;
2110 	bool shouldGrow = false;
2111 	int32 cpuCount = MIN(smp_get_num_cpus(),
2112 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2113 	int32 cpuNumber = smp_get_current_cpu();
2114 	for (int32 i = 0; i < cpuCount; i++) {
2115 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2116 		heap_allocator *heap = sHeaps[heapIndex];
2117 		result = heap_memalign(heap, alignment, size);
2118 		if (result != NULL) {
2119 			shouldGrow = heap_should_grow(heap);
2120 			break;
2121 		}
2122 
2123 #if PARANOID_HEAP_VALIDATION
2124 		heap_validate_heap(heap);
2125 #endif
2126 	}
2127 
2128 	if (result == NULL) {
2129 		// request an urgent grow and wait - we don't do it ourselfs here to
2130 		// serialize growing through the grow thread, as otherwise multiple
2131 		// threads hitting this situation (likely when memory ran out) would
2132 		// all add areas
2133 		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2134 		sLastGrowRequest[heapIndex]++;
2135 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2136 
2137 		// and then try again
2138 		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2139 	} else if (shouldGrow) {
2140 		// should grow sometime soon, notify the grower
2141 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2142 	}
2143 
2144 	if (result == NULL)
2145 		panic("heap: kernel heap has run out of memory\n");
2146 	return result;
2147 }
2148 
2149 
2150 void *
2151 memalign_nogrow(size_t alignment, size_t size)
2152 {
2153 	// use dedicated memory in the grow thread by default
2154 	if (thread_get_current_thread_id() == sHeapGrowThread) {
2155 		void *result = heap_memalign(sGrowHeap, alignment, size);
2156 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
2157 			// hopefully the heap grower will manage to create a new heap
2158 			// before running out of private memory...
2159 			dprintf("heap: requesting new grow heap\n");
2160 			sAddGrowHeap = true;
2161 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2162 		}
2163 
2164 		if (result != NULL)
2165 			return result;
2166 	}
2167 
2168 	// try public memory, there might be something available
2169 	void *result = NULL;
2170 	int32 cpuCount = MIN(smp_get_num_cpus(),
2171 		(int32)sHeapCount / HEAP_CLASS_COUNT);
2172 	int32 cpuNumber = smp_get_current_cpu();
2173 	for (int32 i = 0; i < cpuCount; i++) {
2174 		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2175 		heap_allocator *heap = sHeaps[heapIndex];
2176 		result = heap_memalign(heap, alignment, size);
2177 		if (result != NULL)
2178 			return result;
2179 	}
2180 
2181 	// no memory available
2182 	if (thread_get_current_thread_id() == sHeapGrowThread)
2183 		panic("heap: all heaps have run out of memory while growing\n");
2184 	else
2185 		dprintf("heap: all heaps have run out of memory\n");
2186 
2187 	return NULL;
2188 }
2189 
2190 
2191 void *
2192 malloc_nogrow(size_t size)
2193 {
2194 	return memalign_nogrow(0, size);
2195 }
2196 
2197 
2198 void *
2199 malloc(size_t size)
2200 {
2201 	return memalign(0, size);
2202 }
2203 
2204 
2205 void
2206 free(void *address)
2207 {
2208 	if (!gKernelStartup && !are_interrupts_enabled()) {
2209 		panic("free(): called with interrupts disabled\n");
2210 		return;
2211 	}
2212 
2213 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2214 	for (uint32 i = 0; i < sHeapCount; i++) {
2215 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2216 		if (heap_free(heap, address) == B_OK) {
2217 #if PARANOID_HEAP_VALIDATION
2218 			heap_validate_heap(heap);
2219 #endif
2220 			return;
2221 		}
2222 	}
2223 
2224 	// maybe it was allocated from the dedicated grow heap
2225 	if (heap_free(sGrowHeap, address) == B_OK)
2226 		return;
2227 
2228 	// or maybe it was a huge allocation using an area
2229 	area_info areaInfo;
2230 	area_id area = area_for(address);
2231 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2232 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2233 
2234 		// just make extra sure it was allocated by us
2235 		if (info->magic == kAreaAllocationMagic && info->area == area
2236 			&& info->size == areaInfo.size && info->base == areaInfo.address
2237 			&& info->allocation_size < areaInfo.size) {
2238 			delete_area(area);
2239 			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2240 				area));
2241 			return;
2242 		}
2243 	}
2244 
2245 	panic("free(): free failed for address %p\n", address);
2246 }
2247 
2248 
2249 void *
2250 realloc(void *address, size_t newSize)
2251 {
2252 	if (!gKernelStartup && !are_interrupts_enabled()) {
2253 		panic("realloc(): called with interrupts disabled\n");
2254 		return NULL;
2255 	}
2256 
2257 	if (address == NULL)
2258 		return memalign(0, newSize);
2259 
2260 	if (newSize == 0) {
2261 		free(address);
2262 		return NULL;
2263 	}
2264 
2265 	void *newAddress = NULL;
2266 	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2267 	for (uint32 i = 0; i < sHeapCount; i++) {
2268 		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2269 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
2270 #if PARANOID_HEAP_VALIDATION
2271 			heap_validate_heap(heap);
2272 #endif
2273 			return newAddress;
2274 		}
2275 	}
2276 
2277 	// maybe it was allocated from the dedicated grow heap
2278 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
2279 		return newAddress;
2280 
2281 	// or maybe it was a huge allocation using an area
2282 	area_info areaInfo;
2283 	area_id area = area_for(address);
2284 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2285 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2286 
2287 		// just make extra sure it was allocated by us
2288 		if (info->magic == kAreaAllocationMagic && info->area == area
2289 			&& info->size == areaInfo.size && info->base == areaInfo.address
2290 			&& info->allocation_size < areaInfo.size) {
2291 			size_t available = info->size - ((addr_t)info->allocation_base
2292 				- (addr_t)info->base);
2293 
2294 			if (available >= newSize) {
2295 				// there is enough room available for the newSize
2296 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2297 					"available\n", newSize, area, available));
2298 				info->allocation_size = newSize;
2299 				return address;
2300 			}
2301 
2302 			// have to allocate/copy/free - TODO maybe resize the area instead?
2303 			newAddress = memalign(0, newSize);
2304 			if (newAddress == NULL) {
2305 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2306 					newSize);
2307 				return NULL;
2308 			}
2309 
2310 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2311 			delete_area(area);
2312 			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2313 				"old area %ld\n", newAddress, newSize, area));
2314 			return newAddress;
2315 		}
2316 	}
2317 
2318 	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2319 		newSize);
2320 	return NULL;
2321 }
2322 
2323 
2324 #endif	// !USE_SLAB_ALLOCATOR_FOR_MALLOC
2325 
2326 
2327 void *
2328 calloc(size_t numElements, size_t size)
2329 {
2330 	void *address = memalign(0, numElements * size);
2331 	if (address != NULL)
2332 		memset(address, 0, numElements * size);
2333 
2334 	return address;
2335 }
2336 
2337 
2338 void
2339 deferred_free(void *block)
2340 {
2341 	if (block == NULL)
2342 		return;
2343 
2344 	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2345 
2346 	InterruptsSpinLocker _(sDeferredFreeListLock);
2347 	sDeferredFreeList.Add(entry);
2348 }
2349 
2350 
2351 void *
2352 malloc_referenced(size_t size)
2353 {
2354 	int32 *referencedData = (int32 *)malloc(size + 4);
2355 	if (referencedData == NULL)
2356 		return NULL;
2357 
2358 	*referencedData = 1;
2359 	return referencedData + 1;
2360 }
2361 
2362 
2363 void *
2364 malloc_referenced_acquire(void *data)
2365 {
2366 	if (data != NULL) {
2367 		int32 *referencedData = (int32 *)data - 1;
2368 		atomic_add(referencedData, 1);
2369 	}
2370 
2371 	return data;
2372 }
2373 
2374 
2375 void
2376 malloc_referenced_release(void *data)
2377 {
2378 	if (data == NULL)
2379 		return;
2380 
2381 	int32 *referencedData = (int32 *)data - 1;
2382 	if (atomic_add(referencedData, -1) < 1)
2383 		free(referencedData);
2384 }
2385 
2386 
2387 DeferredDeletable::~DeferredDeletable()
2388 {
2389 }
2390 
2391 
2392 void
2393 deferred_delete(DeferredDeletable *deletable)
2394 {
2395 	if (deletable == NULL)
2396 		return;
2397 
2398 	InterruptsSpinLocker _(sDeferredFreeListLock);
2399 	sDeferredDeletableList.Add(deletable);
2400 }
2401