xref: /haiku/src/system/kernel/heap.cpp (revision 959ff00ddee8411dabb09211f3bfbd52d87229da)
1 /*
2  * Copyright 2008, Michael Lotz, mmlr@mlotz.ch.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
6  * Distributed under the terms of the MIT License.
7  *
8  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
9  * Distributed under the terms of the NewOS License.
10  */
11 #include <arch/debug.h>
12 #include <debug.h>
13 #include <elf.h>
14 #include <heap.h>
15 #include <int.h>
16 #include <kernel.h>
17 #include <lock.h>
18 #include <malloc.h>
19 #include <signal.h>
20 #include <string.h>
21 #include <team.h>
22 #include <thread.h>
23 #include <tracing.h>
24 #include <util/AutoLock.h>
25 #include <util/DoublyLinkedList.h>
26 #include <vm.h>
27 
28 //#define TRACE_HEAP
29 #ifdef TRACE_HEAP
30 #	define TRACE(x) dprintf x
31 #else
32 #	define TRACE(x) ;
33 #endif
34 
35 // initialize newly allocated memory with something non zero
36 #define PARANOID_KERNEL_MALLOC 1
37 // check for double free, and fill freed memory with 0xdeadbeef
38 #define PARANOID_KERNEL_FREE 1
39 // validate sanity of the heap after each operation (slow!)
40 #define PARANOID_HEAP_VALIDATION 0
41 
42 #if KERNEL_HEAP_LEAK_CHECK
43 typedef struct heap_leak_check_info_s {
44 	addr_t		caller;
45 	size_t		size;
46 	thread_id	thread;
47 	team_id		team;
48 } heap_leak_check_info;
49 
50 struct caller_info {
51 	addr_t		caller;
52 	uint32		count;
53 	uint32		size;
54 };
55 
56 static const int32 kCallerInfoTableSize = 1024;
57 static caller_info sCallerInfoTable[kCallerInfoTableSize];
58 static int32 sCallerInfoCount = 0;
59 #endif
60 
61 typedef struct heap_page_s heap_page;
62 
63 typedef struct heap_area_s {
64 	area_id			area;
65 
66 	addr_t			base;
67 	size_t			size;
68 
69 	uint32			page_count;
70 	uint32			free_page_count;
71 
72 	heap_page *		free_pages;
73 	heap_page *		page_table;
74 
75 	heap_area_s *	prev;
76 	heap_area_s *	next;
77 	heap_area_s *	all_next;
78 } heap_area;
79 
80 typedef struct heap_page_s {
81 	heap_area *		area;
82 	uint16			index;
83 	uint16			bin_index : 5;
84 	uint16			free_count : 10;
85 	uint16			in_use : 1;
86 	heap_page_s *	next;
87 	heap_page_s *	prev;
88 	union {
89 		uint16			empty_index;
90 		uint16			allocation_id; // used for bin == bin_count allocations
91 	};
92 	addr_t *		free_list;
93 } heap_page;
94 
95 typedef struct heap_bin_s {
96 	uint32		element_size;
97 	uint16		max_free_count;
98 	heap_page *	page_list; // sorted so that the desired page is always first
99 } heap_bin;
100 
101 struct heap_allocator_s {
102 	mutex		lock;
103 
104 	const char *name;
105 	uint32		bin_count;
106 	uint32		page_size;
107 
108 	uint32		total_pages;
109 	uint32		total_free_pages;
110 	uint32		empty_areas;
111 
112 #if KERNEL_HEAP_LEAK_CHECK
113 	addr_t		(*get_caller)();
114 #endif
115 
116 	heap_bin *	bins;
117 	heap_area *	areas; // sorted so that the desired area is always first
118 	heap_area *	all_areas; // all areas including full ones
119 };
120 
121 static const uint32 kAreaAllocationMagic = 'AAMG';
122 typedef struct area_allocation_info_s {
123 	area_id		area;
124 	void *		base;
125 	uint32		magic;
126 	size_t		size;
127 	size_t		allocation_size;
128 	size_t		allocation_alignment;
129 	void *		allocation_base;
130 } area_allocation_info;
131 
132 struct DeferredFreeListEntry : DoublyLinkedListLinkImpl<DeferredFreeListEntry> {
133 };
134 typedef DoublyLinkedList<DeferredFreeListEntry> DeferredFreeList;
135 
136 // Heap class configuration
137 #define HEAP_CLASS_COUNT 3
138 static heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
139 	{
140 		"small",					/* name */
141 		50,							/* initial percentage */
142 		B_PAGE_SIZE / 8,			/* max allocation size */
143 		B_PAGE_SIZE,				/* page size */
144 		8,							/* min bin size */
145 		4,							/* bin alignment */
146 		8,							/* min count per page */
147 		16							/* max waste per page */
148 	},
149 	{
150 		"medium",					/* name */
151 		30,							/* initial percentage */
152 		B_PAGE_SIZE * 2,			/* max allocation size */
153 		B_PAGE_SIZE * 8,			/* page size */
154 		B_PAGE_SIZE / 8,			/* min bin size */
155 		32,							/* bin alignment */
156 		4,							/* min count per page */
157 		64							/* max waste per page */
158 	},
159 	{
160 		"large",					/* name */
161 		20,							/* initial percentage */
162 		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
163 		B_PAGE_SIZE * 16,			/* page size */
164 		B_PAGE_SIZE * 2,			/* min bin size */
165 		128,						/* bin alignment */
166 		1,							/* min count per page */
167 		256							/* max waste per page */
168 	}
169 };
170 
171 static heap_allocator *sHeaps[HEAP_CLASS_COUNT];
172 static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT];
173 static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT];
174 static heap_allocator *sGrowHeap = NULL;
175 static thread_id sHeapGrowThread = -1;
176 static sem_id sHeapGrowSem = -1;
177 static sem_id sHeapGrownNotify = -1;
178 static bool sAddGrowHeap = false;
179 
180 static DeferredFreeList sDeferredFreeList;
181 static spinlock sDeferredFreeListLock;
182 
183 
184 
185 // #pragma mark - Tracing
186 
187 #if KERNEL_HEAP_TRACING
188 namespace KernelHeapTracing {
189 
190 class Allocate : public AbstractTraceEntry {
191 	public:
192 		Allocate(addr_t address, size_t size)
193 			:	fAddress(address),
194 				fSize(size)
195 		{
196 			Initialized();
197 		}
198 
199 		virtual void AddDump(TraceOutput &out)
200 		{
201 			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
202 		}
203 
204 	private:
205 		addr_t	fAddress;
206 		size_t	fSize;
207 };
208 
209 
210 class Reallocate : public AbstractTraceEntry {
211 	public:
212 		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
213 			:	fOldAddress(oldAddress),
214 				fNewAddress(newAddress),
215 				fNewSize(newSize)
216 		{
217 			Initialized();
218 		};
219 
220 		virtual void AddDump(TraceOutput &out)
221 		{
222 			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
223 				fOldAddress, fNewAddress, fNewSize);
224 		}
225 
226 	private:
227 		addr_t	fOldAddress;
228 		addr_t	fNewAddress;
229 		size_t	fNewSize;
230 };
231 
232 
233 class Free : public AbstractTraceEntry {
234 	public:
235 		Free(addr_t address)
236 			:	fAddress(address)
237 		{
238 			Initialized();
239 		};
240 
241 		virtual void AddDump(TraceOutput &out)
242 		{
243 			out.Print("heap free: 0x%08lx", fAddress);
244 		}
245 
246 	private:
247 		addr_t	fAddress;
248 };
249 
250 
251 } // namespace KernelHeapTracing
252 
253 #	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
254 #else
255 #	define T(x)	;
256 #endif
257 
258 
259 // #pragma mark - Debug functions
260 
261 
262 #if KERNEL_HEAP_LEAK_CHECK
263 static addr_t
264 get_caller()
265 {
266 	// Find the first return address outside of the allocator code. Note, that
267 	// this makes certain assumptions about how the code for the functions
268 	// ends up in the kernel object.
269 	addr_t returnAddresses[5];
270 	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 1, false);
271 	for (int32 i = 0; i < depth; i++) {
272 		if (returnAddresses[i] < (addr_t)&get_caller
273 			|| returnAddresses[i] > (addr_t)&malloc_referenced_release) {
274 			return returnAddresses[i];
275 		}
276 	}
277 
278 	return 0;
279 }
280 #endif
281 
282 
283 static void
284 dump_page(heap_page *page)
285 {
286 	uint32 count = 0;
287 	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
288 		count++;
289 
290 	dprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; free_list %p (%lu entr%s)\n",
291 		page, page->bin_index, page->free_count, page->empty_index,
292 		page->free_list, count, count == 1 ? "y" : "ies");
293 }
294 
295 
296 static void
297 dump_bin(heap_bin *bin)
298 {
299 	dprintf("\telement_size: %lu; max_free_count: %u; page_list %p;\n",
300 		bin->element_size, bin->max_free_count, bin->page_list);
301 
302 	for (heap_page *temp = bin->page_list; temp != NULL; temp = temp->next)
303 		dump_page(temp);
304 }
305 
306 
307 static void
308 dump_bin_list(heap_allocator *heap)
309 {
310 	for (uint32 i = 0; i < heap->bin_count; i++)
311 		dump_bin(&heap->bins[i]);
312 	dprintf("\n");
313 }
314 
315 
316 static void
317 dump_allocator_areas(heap_allocator *heap)
318 {
319 	heap_area *area = heap->all_areas;
320 	while (area) {
321 		dprintf("\tarea %p: area: %ld; base: 0x%08lx; size: %lu; page_count: %lu; free_pages: %p (%lu entr%s)\n",
322 			area, area->area, area->base, area->size, area->page_count,
323 			area->free_pages, area->free_page_count,
324 			area->free_page_count == 1 ? "y" : "ies");
325 		area = area->all_next;
326 	}
327 
328 	dprintf("\n");
329 }
330 
331 
332 static void
333 dump_allocator(heap_allocator *heap, bool areas, bool bins)
334 {
335 	dprintf("allocator %p: name: %s; page_size: %lu; bin_count: %lu; pages: %lu; free_pages: %lu; empty_areas: %lu\n", heap,
336 		heap->name, heap->page_size, heap->bin_count, heap->total_pages,
337 		heap->total_free_pages, heap->empty_areas);
338 
339 	if (areas)
340 		dump_allocator_areas(heap);
341 	if (bins)
342 		dump_bin_list(heap);
343 }
344 
345 
346 static int
347 dump_heap_list(int argc, char **argv)
348 {
349 	if (argc == 2) {
350 		uint64 heapAddress;
351 
352 		if (strcmp(argv[1], "grow") == 0) {
353 			// only dump dedicated grow heap info
354 			dprintf("dedicated grow heap:\n");
355 			dump_allocator(sGrowHeap, true, true);
356 		} else if (strcmp(argv[1], "stats") == 0) {
357 			for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++)
358 				dump_allocator(sHeaps[i], false, false);
359 		} else if (evaluate_debug_expression(argv[1], &heapAddress, true)) {
360 			dump_allocator((heap_allocator*)(addr_t)heapAddress, true, true);
361 		} else
362 			print_debugger_command_usage(argv[0]);
363 
364 		return 0;
365 	}
366 
367 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++)
368 		dump_allocator(sHeaps[i], true, true);
369 
370 	return 0;
371 }
372 
373 
374 #if KERNEL_HEAP_LEAK_CHECK
375 
376 static int
377 dump_allocations(int argc, char **argv)
378 {
379 	team_id team = -1;
380 	thread_id thread = -1;
381 	addr_t caller = 0;
382 	bool statsOnly = false;
383 	for (int32 i = 1; i < argc; i++) {
384 		if (strcmp(argv[i], "team") == 0)
385 			team = strtoul(argv[++i], NULL, 0);
386 		else if (strcmp(argv[i], "thread") == 0)
387 			thread = strtoul(argv[++i], NULL, 0);
388 		else if (strcmp(argv[i], "caller") == 0)
389 			caller = strtoul(argv[++i], NULL, 0);
390 		else if (strcmp(argv[i], "stats") == 0)
391 			statsOnly = true;
392 		else {
393 			print_debugger_command_usage(argv[0]);
394 			return 0;
395 		}
396 	}
397 
398 	size_t totalSize = 0;
399 	uint32 totalCount = 0;
400 	for (uint32 classIndex = 0; classIndex < HEAP_CLASS_COUNT; classIndex++) {
401 		heap_allocator *heap = sHeaps[classIndex];
402 
403 		// go through all the pages in all the areas
404 		heap_area *area = heap->all_areas;
405 		while (area) {
406 			heap_leak_check_info *info = NULL;
407 			for (uint32 i = 0; i < area->page_count; i++) {
408 				heap_page *page = &area->page_table[i];
409 				if (!page->in_use)
410 					continue;
411 
412 				addr_t base = area->base + i * heap->page_size;
413 				if (page->bin_index < heap->bin_count) {
414 					// page is used by a small allocation bin
415 					uint32 elementCount = page->empty_index;
416 					size_t elementSize = heap->bins[page->bin_index].element_size;
417 					for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
418 						// walk the free list to see if this element is in use
419 						bool elementInUse = true;
420 						for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp) {
421 							if ((addr_t)temp == base) {
422 								elementInUse = false;
423 								break;
424 							}
425 						}
426 
427 						if (!elementInUse)
428 							continue;
429 
430 						info = (heap_leak_check_info *)(base + elementSize
431 							- sizeof(heap_leak_check_info));
432 
433 						if ((team == -1 || info->team == team)
434 							&& (thread == -1 || info->thread == thread)
435 							&& (caller == 0 || info->caller == caller)) {
436 							// interesting...
437 							if (!statsOnly) {
438 								dprintf("team: % 6ld; thread: % 6ld; "
439 									"address: 0x%08lx; size: %lu bytes; "
440 									"caller: %#lx\n", info->team, info->thread,
441 									base, info->size, info->caller);
442 							}
443 
444 							totalSize += info->size;
445 							totalCount++;
446 						}
447 					}
448 				} else {
449 					// page is used by a big allocation, find the page count
450 					uint32 pageCount = 1;
451 					while (i + pageCount < area->page_count
452 						&& area->page_table[i + pageCount].in_use
453 						&& area->page_table[i + pageCount].bin_index == heap->bin_count
454 						&& area->page_table[i + pageCount].allocation_id == page->allocation_id)
455 						pageCount++;
456 
457 					info = (heap_leak_check_info *)(base + pageCount
458 						* heap->page_size - sizeof(heap_leak_check_info));
459 
460 					if ((team == -1 || info->team == team)
461 						&& (thread == -1 || info->thread == thread)
462 						&& (caller == 0 || info->caller == caller)) {
463 						// interesting...
464 						if (!statsOnly) {
465 							dprintf("team: % 6ld; thread: % 6ld;"
466 								" address: 0x%08lx; size: %lu bytes;"
467 								" caller: %#lx\n", info->team, info->thread,
468 								base, info->size, info->caller);
469 						}
470 
471 						totalSize += info->size;
472 						totalCount++;
473 					}
474 
475 					// skip the allocated pages
476 					i += pageCount - 1;
477 				}
478 			}
479 
480 			area = area->all_next;
481 		}
482 	}
483 
484 	dprintf("total allocations: %lu; total bytes: %lu\n", totalCount, totalSize);
485 	return 0;
486 }
487 
488 
489 static caller_info*
490 get_caller_info(addr_t caller)
491 {
492 	// find the caller info
493 	for (int32 i = 0; i < sCallerInfoCount; i++) {
494 		if (caller == sCallerInfoTable[i].caller)
495 			return &sCallerInfoTable[i];
496 	}
497 
498 	// not found, add a new entry, if there are free slots
499 	if (sCallerInfoCount >= kCallerInfoTableSize)
500 		return NULL;
501 
502 	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
503 	info->caller = caller;
504 	info->count = 0;
505 	info->size = 0;
506 
507 	return info;
508 }
509 
510 
511 static int
512 caller_info_compare_size(const void* _a, const void* _b)
513 {
514 	const caller_info* a = (const caller_info*)_a;
515 	const caller_info* b = (const caller_info*)_b;
516 	return (int)(b->size - a->size);
517 }
518 
519 
520 static int
521 caller_info_compare_count(const void* _a, const void* _b)
522 {
523 	const caller_info* a = (const caller_info*)_a;
524 	const caller_info* b = (const caller_info*)_b;
525 	return (int)(b->count - a->count);
526 }
527 
528 
529 static bool
530 analyze_allocation_callers(heap_allocator* heap)
531 {
532 	// go through all the pages in all the areas
533 	heap_area *area = heap->all_areas;
534 	while (area) {
535 		heap_leak_check_info *info = NULL;
536 		for (uint32 i = 0; i < area->page_count; i++) {
537 			heap_page *page = &area->page_table[i];
538 			if (!page->in_use)
539 				continue;
540 
541 			addr_t base = area->base + i * heap->page_size;
542 			if (page->bin_index < heap->bin_count) {
543 				// page is used by a small allocation bin
544 				uint32 elementCount = page->empty_index;
545 				size_t elementSize = heap->bins[page->bin_index].element_size;
546 				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
547 					// walk the free list to see if this element is in use
548 					bool elementInUse = true;
549 					for (addr_t *temp = page->free_list; temp != NULL;
550 						temp = (addr_t *)*temp) {
551 						if ((addr_t)temp == base) {
552 							elementInUse = false;
553 							break;
554 						}
555 					}
556 
557 					if (!elementInUse)
558 						continue;
559 
560 					info = (heap_leak_check_info *)(base + elementSize
561 						- sizeof(heap_leak_check_info));
562 
563 					caller_info* callerInfo = get_caller_info(info->caller);
564 					if (callerInfo == NULL) {
565 						kprintf("out of space for caller infos\n");
566 						return 0;
567 					}
568 
569 					callerInfo->count++;
570 					callerInfo->size += info->size;
571 				}
572 			} else {
573 				// page is used by a big allocation, find the page count
574 				uint32 pageCount = 1;
575 				while (i + pageCount < area->page_count
576 					&& area->page_table[i + pageCount].in_use
577 					&& area->page_table[i + pageCount].bin_index
578 						== heap->bin_count
579 					&& area->page_table[i + pageCount].allocation_id
580 						== page->allocation_id) {
581 					pageCount++;
582 				}
583 
584 				info = (heap_leak_check_info *)(base + pageCount
585 					* heap->page_size - sizeof(heap_leak_check_info));
586 
587 				caller_info* callerInfo = get_caller_info(info->caller);
588 				if (callerInfo == NULL) {
589 					kprintf("out of space for caller infos\n");
590 					return false;
591 				}
592 
593 				callerInfo->count++;
594 				callerInfo->size += info->size;
595 
596 				// skip the allocated pages
597 				i += pageCount - 1;
598 			}
599 		}
600 
601 		area = area->all_next;
602 	}
603 
604 	return true;
605 }
606 
607 
608 static int
609 dump_allocations_per_caller(int argc, char **argv)
610 {
611 	bool sortBySize = true;
612 	heap_allocator* heap = NULL;
613 
614 	for (int32 i = 1; i < argc; i++) {
615 		if (strcmp(argv[i], "-c") == 0) {
616 			sortBySize = false;
617 		} else if (strcmp(argv[i], "-h") == 0) {
618 			uint64 heapAddress;
619 			if (++i >= argc
620 				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
621 				print_debugger_command_usage(argv[0]);
622 				return 0;
623 			}
624 
625 			heap = (heap_allocator*)(addr_t)heapAddress;
626 		} else {
627 			print_debugger_command_usage(argv[0]);
628 			return 0;
629 		}
630 	}
631 
632 	sCallerInfoCount = 0;
633 
634 	if (heap != NULL) {
635 		if (!analyze_allocation_callers(heap))
636 			return 0;
637 	} else {
638 		for (uint32 classIndex = 0; classIndex < HEAP_CLASS_COUNT;
639 				classIndex++) {
640 			if (!analyze_allocation_callers(sHeaps[classIndex]))
641 				return 0;
642 		}
643 	}
644 
645 	// sort the array
646 	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
647 		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
648 
649 	kprintf("%ld different callers, sorted by %s...\n\n", sCallerInfoCount,
650 		sortBySize ? "size" : "count");
651 
652 	kprintf("     count        size      caller\n");
653 	kprintf("----------------------------------\n");
654 	for (int32 i = 0; i < sCallerInfoCount; i++) {
655 		caller_info& info = sCallerInfoTable[i];
656 		kprintf("%10ld  %10ld  %#08lx", info.count, info.size, info.caller);
657 
658 		const char* symbol;
659 		const char* imageName;
660 		bool exactMatch;
661 		addr_t baseAddress;
662 
663 		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
664 				&imageName, &exactMatch) == B_OK) {
665 			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
666 				info.caller - baseAddress, imageName,
667 				exactMatch ? "" : " (nearest)");
668 		} else
669 			kprintf("\n");
670 	}
671 
672 	return 0;
673 }
674 
675 #endif // KERNEL_HEAP_LEAK_CHECK
676 
677 
678 #if PARANOID_HEAP_VALIDATION
679 static void
680 heap_validate_heap(heap_allocator *heap)
681 {
682 	mutex_lock(&heap->lock);
683 
684 	uint32 totalPageCount = 0;
685 	uint32 totalFreePageCount = 0;
686 	heap_area *area = heap->all_areas;
687 	while (area != NULL) {
688 		// validate the free pages list
689 		uint32 freePageCount = 0;
690 		heap_page *lastPage = NULL;
691 		heap_page *page = area->free_pages;
692 		while (page) {
693 			if ((addr_t)page < (addr_t)&area->page_table[0]
694 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
695 				panic("free page is not part of the page table\n");
696 
697 			if (page->index >= area->page_count)
698 				panic("free page has invalid index\n");
699 
700 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
701 				panic("free page index does not lead to target page\n");
702 
703 			if (page->prev != lastPage)
704 				panic("free page entry has invalid prev link\n");
705 
706 			if (page->in_use)
707 				panic("free page marked as in use\n");
708 
709 			lastPage = page;
710 			page = page->next;
711 			freePageCount++;
712 		}
713 
714 		totalPageCount += freePageCount;
715 		totalFreePageCount += freePageCount;
716 		if (area->free_page_count != freePageCount)
717 			panic("free page count doesn't match free page list\n");
718 
719 		// validate the page table
720 		uint32 usedPageCount = 0;
721 		for (uint32 i = 0; i < area->page_count; i++) {
722 			if (area->page_table[i].in_use)
723 				usedPageCount++;
724 		}
725 
726 		totalPageCount += usedPageCount;
727 		if (freePageCount + usedPageCount != area->page_count) {
728 			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
729 				freePageCount, usedPageCount, area->page_count);
730 		}
731 
732 		area = area->all_next;
733 	}
734 
735 	// validate the areas
736 	area = heap->areas;
737 	heap_area *lastArea = NULL;
738 	uint32 lastFreeCount = 0;
739 	while (area != NULL) {
740 		if (area->free_page_count < lastFreeCount)
741 			panic("size ordering of area list broken\n");
742 
743 		if (area->prev != lastArea)
744 			panic("area list entry has invalid prev link\n");
745 
746 		lastArea = area;
747 		lastFreeCount = area->free_page_count;
748 		area = area->next;
749 	}
750 
751 	lastArea = NULL;
752 	area = heap->all_areas;
753 	while (area != NULL) {
754 		if (lastArea != NULL && lastArea->base < area->base)
755 			panic("base ordering of all_areas list broken\n");
756 
757 		lastArea = area;
758 		area = area->all_next;
759 	}
760 
761 	// validate the bins
762 	for (uint32 i = 0; i < heap->bin_count; i++) {
763 		heap_bin *bin = &heap->bins[i];
764 
765 		heap_page *lastPage = NULL;
766 		heap_page *page = bin->page_list;
767 		lastFreeCount = 0;
768 		while (page) {
769 			area = heap->all_areas;
770 			while (area) {
771 				if (area == page->area)
772 					break;
773 				area = area->all_next;
774 			}
775 
776 			if (area == NULL) {
777 				panic("page area not present in area list\n");
778 				page = page->next;
779 				continue;
780 			}
781 
782 			if ((addr_t)page < (addr_t)&area->page_table[0]
783 				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
784 				panic("used page is not part of the page table\n");
785 
786 			if (page->index >= area->page_count)
787 				panic("used page has invalid index\n");
788 
789 			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
790 				panic("used page index does not lead to target page\n");
791 
792 			if (page->prev != lastPage)
793 				panic("used page entry has invalid prev link (%p vs %p bin %lu)\n",
794 					page->prev, lastPage, i);
795 
796 			if (!page->in_use)
797 				panic("used page marked as not in use\n");
798 
799 			if (page->bin_index != i)
800 				panic("used page with bin index %u in page list of bin %lu\n",
801 					page->bin_index, i);
802 
803 			if (page->free_count < lastFreeCount)
804 				panic("ordering of bin page list broken\n");
805 
806 			// validate the free list
807 			uint32 freeSlotsCount = 0;
808 			addr_t *element = page->free_list;
809 			addr_t pageBase = area->base + page->index * heap->page_size;
810 			while (element) {
811 				if ((addr_t)element < pageBase
812 					|| (addr_t)element >= pageBase + heap->page_size)
813 					panic("free list entry out of page range\n");
814 
815 				if (((addr_t)element - pageBase) % bin->element_size != 0)
816 					panic("free list entry not on a element boundary\n");
817 
818 				element = (addr_t *)*element;
819 				freeSlotsCount++;
820 			}
821 
822 			uint32 slotCount = bin->max_free_count;
823 			if (page->empty_index > slotCount)
824 				panic("empty index beyond slot count (%u with %lu slots)\n",
825 					page->empty_index, slotCount);
826 
827 			freeSlotsCount += (slotCount - page->empty_index);
828 			if (freeSlotsCount > slotCount)
829 				panic("more free slots than fit into the page\n");
830 
831 			lastPage = page;
832 			lastFreeCount = page->free_count;
833 			page = page->next;
834 		}
835 	}
836 
837 	mutex_unlock(&heap->lock);
838 }
839 #endif // PARANOID_HEAP_VALIDATION
840 
841 
842 // #pragma mark - Heap functions
843 
844 
845 static void
846 heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
847 {
848 	mutex_lock(&heap->lock);
849 
850 	heap_area *area = (heap_area *)base;
851 	area->area = areaID;
852 
853 	base += sizeof(heap_area);
854 	size -= sizeof(heap_area);
855 
856 	uint32 pageCount = size / heap->page_size;
857 	size_t pageTableSize = pageCount * sizeof(heap_page);
858 	area->page_table = (heap_page *)base;
859 	base += pageTableSize;
860 	size -= pageTableSize;
861 
862 	// the rest is now actually usable memory (rounded to the next page)
863 	area->base = ROUNDUP(base, B_PAGE_SIZE);
864 	area->size = size & ~(B_PAGE_SIZE - 1);
865 
866 	// now we know the real page count
867 	pageCount = area->size / heap->page_size;
868 	area->page_count = pageCount;
869 
870 	// zero out the page table and fill in page indexes
871 	memset((void *)area->page_table, 0, pageTableSize);
872 	for (uint32 i = 0; i < pageCount; i++) {
873 		area->page_table[i].area = area;
874 		area->page_table[i].index = i;
875 	}
876 
877 	// add all pages up into the free pages list
878 	for (uint32 i = 1; i < pageCount; i++) {
879 		area->page_table[i - 1].next = &area->page_table[i];
880 		area->page_table[i].prev = &area->page_table[i - 1];
881 	}
882 	area->free_pages = &area->page_table[0];
883 	area->free_page_count = pageCount;
884 	area->page_table[0].prev = NULL;
885 
886 	area->next = NULL;
887 	if (heap->areas == NULL) {
888 		// it's the only (empty) area in that heap
889 		area->prev = NULL;
890 		heap->areas = area;
891 	} else {
892 		// link in this area as the last one as it is completely empty
893 		heap_area *lastArea = heap->areas;
894 		while (lastArea->next != NULL)
895 			lastArea = lastArea->next;
896 
897 		lastArea->next = area;
898 		area->prev = lastArea;
899 	}
900 
901 	// insert this area in the all_areas list so it stays ordered by base
902 	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
903 		area->all_next = heap->all_areas;
904 		heap->all_areas = area;
905 	} else {
906 		heap_area *insert = heap->all_areas;
907 		while (insert->all_next && insert->all_next->base > area->base)
908 			insert = insert->all_next;
909 
910 		area->all_next = insert->all_next;
911 		insert->all_next = area;
912 	}
913 
914 	heap->total_pages += area->page_count;
915 	heap->total_free_pages += area->free_page_count;
916 
917 	if (areaID >= B_OK) {
918 		// this later on deletable area is yet empty - the empty count will be
919 		// decremented as soon as this area is used for the first time
920 		heap->empty_areas++;
921 	}
922 
923 	mutex_unlock(&heap->lock);
924 
925 	dprintf("heap_add_area: area %ld added to %s heap %p - usable range 0x%08lx - 0x%08lx\n",
926 		area->area, heap->name, heap, area->base, area->base + area->size);
927 }
928 
929 
930 static status_t
931 heap_remove_area(heap_allocator *heap, heap_area *area, bool locked)
932 {
933 	if (!locked)
934 		mutex_lock(&heap->lock);
935 
936 	if (area->free_page_count != area->page_count) {
937 		panic("tried removing heap area that has still pages in use");
938 		return B_ERROR;
939 	}
940 
941 	if (area->prev == NULL && area->next == NULL) {
942 		panic("tried removing the last non-full heap area");
943 		return B_ERROR;
944 	}
945 
946 	if (heap->areas == area)
947 		heap->areas = area->next;
948 	if (area->prev != NULL)
949 		area->prev->next = area->next;
950 	if (area->next != NULL)
951 		area->next->prev = area->prev;
952 
953 	if (heap->all_areas == area)
954 		heap->all_areas = area->all_next;
955 	else {
956 		heap_area *previous = heap->all_areas;
957 		while (previous) {
958 			if (previous->all_next == area) {
959 				previous->all_next = area->all_next;
960 				break;
961 			}
962 
963 			previous = previous->all_next;
964 		}
965 
966 		if (previous == NULL)
967 			panic("removing heap area that is not in all list");
968 	}
969 
970 	heap->total_pages -= area->page_count;
971 	heap->total_free_pages -= area->free_page_count;
972 
973 	if (!locked)
974 		mutex_unlock(&heap->lock);
975 
976 	dprintf("heap_remove_area: area %ld with range 0x%08lx - 0x%08lx removed from %s heap %p\n",
977 		area->area, area->base, area->base + area->size, heap->name, heap);
978 	return B_OK;
979 }
980 
981 
982 heap_allocator *
983 heap_create_allocator(const char *name, addr_t base, size_t size,
984 	const heap_class *heapClass)
985 {
986 	heap_allocator *heap = (heap_allocator *)base;
987 	base += sizeof(heap_allocator);
988 	size -= sizeof(heap_allocator);
989 
990 	heap->name = name;
991 	heap->page_size = heapClass->page_size;
992 	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
993 	heap->areas = heap->all_areas = NULL;
994 	heap->bins = (heap_bin *)base;
995 
996 #if KERNEL_HEAP_LEAK_CHECK
997 	heap->get_caller = &get_caller;
998 #endif
999 
1000 	heap->bin_count = 0;
1001 	size_t binSize = 0, lastSize = 0;
1002 	uint32 count = heap->page_size / heapClass->min_bin_size;
1003 	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1004 		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1005 		if (binSize == lastSize)
1006 			continue;
1007 		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1008 			continue;
1009 
1010 		heap_bin *bin = &heap->bins[heap->bin_count];
1011 		bin->element_size = binSize;
1012 		bin->max_free_count = heap->page_size / binSize;
1013 		bin->page_list = NULL;
1014 		heap->bin_count++;
1015 
1016 		if (heap->bin_count >= 32)
1017 			panic("heap configuration invalid - max bin count reached\n");
1018 	};
1019 
1020 	base += heap->bin_count * sizeof(heap_bin);
1021 	size -= heap->bin_count * sizeof(heap_bin);
1022 
1023 	mutex_init(&heap->lock, "heap_mutex");
1024 
1025 	heap_add_area(heap, -1, base, size);
1026 	return heap;
1027 }
1028 
1029 
1030 static inline area_id
1031 heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1032 {
1033 	area->free_page_count += pageCount;
1034 	heap->total_free_pages += pageCount;
1035 
1036 	if (area->free_page_count == pageCount) {
1037 		// we need to add ourselfs to the area list of the heap
1038 		area->prev = NULL;
1039 		area->next = heap->areas;
1040 		if (area->next)
1041 			area->next->prev = area;
1042 		heap->areas = area;
1043 	} else {
1044 		// we might need to move back in the area list
1045 		if (area->next && area->next->free_page_count < area->free_page_count) {
1046 			// move ourselfs so the list stays ordered
1047 			heap_area *insert = area->next;
1048 			while (insert->next
1049 				&& insert->next->free_page_count < area->free_page_count)
1050 				insert = insert->next;
1051 
1052 			if (area->prev)
1053 				area->prev->next = area->next;
1054 			if (area->next)
1055 				area->next->prev = area->prev;
1056 			if (heap->areas == area)
1057 				heap->areas = area->next;
1058 
1059 			area->prev = insert;
1060 			area->next = insert->next;
1061 			if (area->next)
1062 				area->next->prev = area;
1063 			insert->next = area;
1064 		}
1065 	}
1066 
1067 	// can and should we free this area?
1068 	if (area->free_page_count == area->page_count && area->area >= B_OK) {
1069 		if (heap->empty_areas > 0) {
1070 			// we already have at least another empty area, just free this one
1071 			if (heap_remove_area(heap, area, true) == B_OK) {
1072 				// we cannot delete the area here, because it would result
1073 				// in calls to free, which would necessarily deadlock as we
1074 				// are locked at this point
1075 				return area->area;
1076 			}
1077 		}
1078 
1079 		heap->empty_areas++;
1080 	}
1081 
1082 	return -1;
1083 }
1084 
1085 
1086 static inline void
1087 heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1088 {
1089 	if (area->free_page_count == area->page_count && area->area >= B_OK) {
1090 		// this area was completely empty
1091 		heap->empty_areas--;
1092 	}
1093 
1094 	area->free_page_count -= pageCount;
1095 	heap->total_free_pages -= pageCount;
1096 
1097 	if (area->free_page_count == 0) {
1098 		// the area is now full so we remove it from the area list
1099 		heap->areas = area->next;
1100 		if (area->next)
1101 			area->next->prev = NULL;
1102 		area->next = area->prev = NULL;
1103 	}
1104 }
1105 
1106 
1107 static inline void
1108 heap_link_page(heap_page *page, heap_page **list)
1109 {
1110 	page->prev = NULL;
1111 	page->next = *list;
1112 	if (page->next)
1113 		page->next->prev = page;
1114 	*list = page;
1115 }
1116 
1117 
1118 static inline void
1119 heap_unlink_page(heap_page *page, heap_page **list)
1120 {
1121 	if (page->prev)
1122 		page->prev->next = page->next;
1123 	if (page->next)
1124 		page->next->prev = page->prev;
1125 	if (list && *list == page) {
1126 		*list = page->next;
1127 		if (page->next)
1128 			page->next->prev = NULL;
1129 	}
1130 }
1131 
1132 
1133 static heap_page *
1134 heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount)
1135 {
1136 	heap_area *area = heap->areas;
1137 	while (area) {
1138 		bool found = false;
1139 		int32 first = -1;
1140 		for (uint32 i = 0; i < area->page_count; i++) {
1141 			if (area->page_table[i].in_use) {
1142 				first = -1;
1143 				continue;
1144 			}
1145 
1146 			if (first > 0) {
1147 				if ((i + 1 - first) == pageCount) {
1148 					found = true;
1149 					break;
1150 				}
1151 			} else
1152 				first = i;
1153 		}
1154 
1155 		if (!found) {
1156 			area = area->next;
1157 			continue;
1158 		}
1159 
1160 		for (uint32 i = first; i < first + pageCount; i++) {
1161 			heap_page *page = &area->page_table[i];
1162 			page->in_use = 1;
1163 			page->bin_index = heap->bin_count;
1164 
1165 			heap_unlink_page(page, &area->free_pages);
1166 
1167 			page->next = page->prev = NULL;
1168 			page->free_list = NULL;
1169 			page->allocation_id = (uint16)first;
1170 		}
1171 
1172 		heap_free_pages_removed(heap, area, pageCount);
1173 		return &area->page_table[first];
1174 	}
1175 
1176 	return NULL;
1177 }
1178 
1179 
1180 static void *
1181 heap_raw_alloc(heap_allocator *heap, size_t size, uint32 binIndex)
1182 {
1183 	if (binIndex < heap->bin_count) {
1184 		heap_bin *bin = &heap->bins[binIndex];
1185 		heap_page *page = bin->page_list;
1186 		if (page == NULL) {
1187 			heap_area *area = heap->areas;
1188 			if (area == NULL) {
1189 				TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1190 					size));
1191 				return NULL;
1192 			}
1193 
1194 			// by design there are only areas in the list that still have
1195 			// free pages available
1196 			page = area->free_pages;
1197 			area->free_pages = page->next;
1198 			if (page->next)
1199 				page->next->prev = NULL;
1200 
1201 			heap_free_pages_removed(heap, area, 1);
1202 			page->in_use = 1;
1203 			page->bin_index = binIndex;
1204 			page->free_count = bin->max_free_count;
1205 			page->empty_index = 0;
1206 			page->free_list = NULL;
1207 			page->next = page->prev = NULL;
1208 			bin->page_list = page;
1209 		}
1210 
1211 		// we have a page where we have a free slot
1212 		void *address = NULL;
1213 		if (page->free_list) {
1214 			// there's a previously freed entry we can use
1215 			address = page->free_list;
1216 			page->free_list = (addr_t *)*page->free_list;
1217 		} else {
1218 			// the page hasn't been fully allocated so use the next empty_index
1219 			address = (void *)(page->area->base + page->index * heap->page_size
1220 				+ page->empty_index * bin->element_size);
1221 			page->empty_index++;
1222 		}
1223 
1224 		page->free_count--;
1225 		if (page->free_count == 0) {
1226 			// the page is now full so we remove it from the page_list
1227 			bin->page_list = page->next;
1228 			if (page->next)
1229 				page->next->prev = NULL;
1230 			page->next = page->prev = NULL;
1231 		}
1232 
1233 #if KERNEL_HEAP_LEAK_CHECK
1234 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1235 			+ bin->element_size - sizeof(heap_leak_check_info));
1236 		info->size = size - sizeof(heap_leak_check_info);
1237 		info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1238 		info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1239 		info->caller = heap->get_caller();
1240 #endif
1241 		return address;
1242 	}
1243 
1244 	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1245 	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount);
1246 	if (firstPage == NULL) {
1247 		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1248 			heap, size));
1249 		return NULL;
1250 	}
1251 
1252 #if KERNEL_HEAP_LEAK_CHECK
1253 	heap_leak_check_info *info = (heap_leak_check_info *)(firstPage->area->base
1254 		+ (firstPage->index + pageCount) * heap->page_size
1255 		- sizeof(heap_leak_check_info));
1256 	info->size = size - sizeof(heap_leak_check_info);
1257 	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1258 	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1259 	info->caller = heap->get_caller();
1260 #endif
1261 	return (void *)(firstPage->area->base + firstPage->index * heap->page_size);
1262 }
1263 
1264 
1265 #if DEBUG
1266 static bool
1267 is_valid_alignment(size_t number)
1268 {
1269 	// this cryptic line accepts zero and all powers of two
1270 	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1271 }
1272 #endif
1273 
1274 
1275 inline bool
1276 heap_should_grow(heap_allocator *heap)
1277 {
1278 	// suggest growing if there is less than 20% of a grow size available
1279 	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1280 }
1281 
1282 
1283 void *
1284 heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1285 {
1286 	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1287 
1288 #if DEBUG
1289 	if (!is_valid_alignment(alignment))
1290 		panic("memalign() with an alignment which is not a power of 2\n");
1291 #endif
1292 
1293 	mutex_lock(&heap->lock);
1294 
1295 #if KERNEL_HEAP_LEAK_CHECK
1296 	size += sizeof(heap_leak_check_info);
1297 #endif
1298 
1299 	if (alignment != 0) {
1300 		// TODO: The alignment is done by ensuring that the element size
1301 		// of the target bin is aligned with the requested alignment. This
1302 		// has the problem that it wastes space because a better (smaller) bin
1303 		// could possibly be selected. We should pick the best bin and check
1304 		// if there is an aligned block in the free list or if a new (page
1305 		// aligned) page has to be allocated anyway.
1306 		size = ROUNDUP(size, alignment);
1307 	}
1308 
1309 	uint32 binIndex;
1310 	for (binIndex = 0; binIndex < heap->bin_count; binIndex++) {
1311 		if (size <= heap->bins[binIndex].element_size)
1312 			break;
1313 	}
1314 
1315 	void *address = heap_raw_alloc(heap, size, binIndex);
1316 
1317 	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n", size, address));
1318 
1319 #if KERNEL_HEAP_LEAK_CHECK
1320 	size -= sizeof(heap_leak_check_info);
1321 #endif
1322 
1323 	T(Allocate((addr_t)address, size));
1324 	mutex_unlock(&heap->lock);
1325 	if (address == NULL)
1326 		return address;
1327 
1328 #if PARANOID_KERNEL_FREE
1329 	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1330 	// and the user does not clear it
1331 	if (((uint32 *)address)[1] == 0xdeadbeef)
1332 		((uint32 *)address)[1] = 0xcccccccc;
1333 #endif
1334 
1335 #if PARANOID_KERNEL_MALLOC
1336 	memset(address, 0xcc, size);
1337 #endif
1338 
1339 	return address;
1340 }
1341 
1342 
1343 status_t
1344 heap_free(heap_allocator *heap, void *address)
1345 {
1346 	if (address == NULL)
1347 		return B_OK;
1348 
1349 	mutex_lock(&heap->lock);
1350 
1351 	heap_area *area = heap->all_areas;
1352 	while (area) {
1353 		// since the all_areas list is ordered by base with the biggest
1354 		// base at the top, we need only find the first area with a base
1355 		// smaller than our address to become our only candidate for freeing
1356 		if (area->base <= (addr_t)address) {
1357 			if ((addr_t)address >= area->base + area->size) {
1358 				// the only candidate area doesn't contain the address,
1359 				// set it to NULL so we return below (none of the other areas
1360 				// can contain the address as the list is ordered)
1361 				area = NULL;
1362 			}
1363 
1364 			break;
1365 		}
1366 
1367 		area = area->all_next;
1368 	}
1369 
1370 	if (area == NULL) {
1371 		// this address does not belong to us
1372 		mutex_unlock(&heap->lock);
1373 		return B_ENTRY_NOT_FOUND;
1374 	}
1375 
1376 	TRACE(("free(): asked to free pointer %p\n", address));
1377 
1378 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1379 		/ heap->page_size];
1380 
1381 	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page, page->bin_index, page->free_count));
1382 
1383 	if (page->bin_index > heap->bin_count) {
1384 		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1385 		mutex_unlock(&heap->lock);
1386 		return B_ERROR;
1387 	}
1388 
1389 	area_id areaToDelete = -1;
1390 	if (page->bin_index < heap->bin_count) {
1391 		// small allocation
1392 		heap_bin *bin = &heap->bins[page->bin_index];
1393 		if (((addr_t)address - area->base - page->index
1394 			* heap->page_size) % bin->element_size != 0) {
1395 			panic("free(): passed invalid pointer %p supposed to be in bin for element size %ld\n", address, bin->element_size);
1396 			mutex_unlock(&heap->lock);
1397 			return B_ERROR;
1398 		}
1399 
1400 #if PARANOID_KERNEL_FREE
1401 		if (((uint32 *)address)[1] == 0xdeadbeef) {
1402 			// This block looks like it was freed already, walk the free list
1403 			// on this page to make sure this address doesn't exist.
1404 			for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp) {
1405 				if (temp == address) {
1406 					panic("free(): address %p already exists in page free list\n", address);
1407 					mutex_unlock(&heap->lock);
1408 					return B_ERROR;
1409 				}
1410 			}
1411 		}
1412 
1413 		uint32 *dead = (uint32 *)address;
1414 		if (bin->element_size % 4 != 0) {
1415 			panic("free(): didn't expect a bin element size that is not a multiple of 4\n");
1416 			mutex_unlock(&heap->lock);
1417 			return B_ERROR;
1418 		}
1419 
1420 		// the first 4 bytes are overwritten with the next free list pointer later
1421 		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1422 			dead[i] = 0xdeadbeef;
1423 #endif
1424 
1425 		// add the address to the page free list
1426 		*(addr_t *)address = (addr_t)page->free_list;
1427 		page->free_list = (addr_t *)address;
1428 		page->free_count++;
1429 
1430 		if (page->free_count == bin->max_free_count) {
1431 			// we are now empty, remove the page from the bin list
1432 			heap_unlink_page(page, &bin->page_list);
1433 			page->in_use = 0;
1434 			heap_link_page(page, &area->free_pages);
1435 			areaToDelete = heap_free_pages_added(heap, area, 1);
1436 		} else if (page->free_count == 1) {
1437 			// we need to add ourselfs to the page list of the bin
1438 			heap_link_page(page, &bin->page_list);
1439 		} else {
1440 			// we might need to move back in the free pages list
1441 			if (page->next && page->next->free_count < page->free_count) {
1442 				// move ourselfs so the list stays ordered
1443 				heap_page *insert = page->next;
1444 				while (insert->next
1445 					&& insert->next->free_count < page->free_count)
1446 					insert = insert->next;
1447 
1448 				heap_unlink_page(page, &bin->page_list);
1449 
1450 				page->prev = insert;
1451 				page->next = insert->next;
1452 				if (page->next)
1453 					page->next->prev = page;
1454 				insert->next = page;
1455 			}
1456 		}
1457 	} else {
1458 		// large allocation, just return the pages to the page free list
1459 		uint32 allocationID = page->allocation_id;
1460 		uint32 maxPages = area->page_count - page->index;
1461 		uint32 pageCount = 0;
1462 		for (uint32 i = 0; i < maxPages; i++) {
1463 			// loop until we find the end of this allocation
1464 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1465 				|| page[i].allocation_id != allocationID)
1466 				break;
1467 
1468 			// this page still belongs to the same allocation
1469 			page[i].in_use = 0;
1470 			page[i].allocation_id = 0;
1471 
1472 			// return it to the free list
1473 			heap_link_page(&page[i], &area->free_pages);
1474 			pageCount++;
1475 		}
1476 
1477 		areaToDelete = heap_free_pages_added(heap, area, pageCount);
1478 	}
1479 
1480 	T(Free((addr_t)address));
1481 	mutex_unlock(&heap->lock);
1482 
1483 	if (areaToDelete >= B_OK) {
1484 		// adding free pages caused an area to become empty and freeable that
1485 		// we can now delete as we don't hold the heap lock anymore
1486 		delete_area(areaToDelete);
1487 	}
1488 
1489 	return B_OK;
1490 }
1491 
1492 
1493 #if KERNEL_HEAP_LEAK_CHECK
1494 extern "C" void
1495 heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1496 {
1497 	heap->get_caller = getCaller;
1498 }
1499 #endif
1500 
1501 
1502 static status_t
1503 heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1504 	size_t newSize)
1505 {
1506 	mutex_lock(&heap->lock);
1507 	heap_area *area = heap->all_areas;
1508 	while (area) {
1509 		if ((addr_t)address >= area->base
1510 			&& (addr_t)address < area->base + area->size)
1511 			break;
1512 
1513 		area = area->all_next;
1514 	}
1515 
1516 	if (area == NULL) {
1517 		// this address does not belong to us
1518 		mutex_unlock(&heap->lock);
1519 		return B_ENTRY_NOT_FOUND;
1520 	}
1521 
1522 	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1523 
1524 	heap_page *page = &area->page_table[((addr_t)address - area->base)
1525 		/ heap->page_size];
1526 	if (page->bin_index > heap->bin_count) {
1527 		panic("realloc(): page %p: invalid bin_index %d\n", page, page->bin_index);
1528 		mutex_unlock(&heap->lock);
1529 		return B_ERROR;
1530 	}
1531 
1532 	// find out the size of the old allocation first
1533 	size_t minSize = 0;
1534 	size_t maxSize = 0;
1535 	if (page->bin_index < heap->bin_count) {
1536 		// this was a small allocation
1537 		heap_bin *bin = &heap->bins[page->bin_index];
1538 		maxSize = bin->element_size;
1539 		if (page->bin_index > 0)
1540 			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1541 	} else {
1542 		// this was a large allocation
1543 		uint32 allocationID = page->allocation_id;
1544 		uint32 maxPages = area->page_count - page->index;
1545 		maxSize = heap->page_size;
1546 		for (uint32 i = 1; i < maxPages; i++) {
1547 			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1548 				|| page[i].allocation_id != allocationID)
1549 				break;
1550 
1551 			minSize += heap->page_size;
1552 			maxSize += heap->page_size;
1553 		}
1554 	}
1555 
1556 	mutex_unlock(&heap->lock);
1557 
1558 #if KERNEL_HEAP_LEAK_CHECK
1559 	newSize += sizeof(heap_leak_check_info);
1560 #endif
1561 
1562 	// does the new allocation simply fit in the old allocation?
1563 	if (newSize > minSize && newSize <= maxSize) {
1564 #if KERNEL_HEAP_LEAK_CHECK
1565 		// update the size info (the info is at the end so stays where it is)
1566 		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1567 			+ maxSize - sizeof(heap_leak_check_info));
1568 		info->size = newSize - sizeof(heap_leak_check_info);
1569 		newSize -= sizeof(heap_leak_check_info);
1570 #endif
1571 
1572 		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1573 		*newAddress = address;
1574 		return B_OK;
1575 	}
1576 
1577 #if KERNEL_HEAP_LEAK_CHECK
1578 	// new leak check info will be created with the malloc below
1579 	newSize -= sizeof(heap_leak_check_info);
1580 #endif
1581 
1582 	// if not, allocate a new chunk of memory
1583 	*newAddress = memalign(0, newSize);
1584 	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1585 	if (*newAddress == NULL) {
1586 		// we tried but it didn't work out, but still the operation is done
1587 		return B_OK;
1588 	}
1589 
1590 	// copy the old data and free the old allocation
1591 	memcpy(*newAddress, address, min_c(maxSize, newSize));
1592 	heap_free(heap, address);
1593 	return B_OK;
1594 }
1595 
1596 
1597 inline uint32
1598 heap_class_for(size_t size)
1599 {
1600 #if KERNEL_HEAP_LEAK_CHECK
1601 	// take the extra info size into account
1602 	size += sizeof(heap_leak_check_info_s);
1603 #endif
1604 
1605 	uint32 index = 0;
1606 	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1607 		if (size <= sHeapClasses[index].max_allocation_size)
1608 			return index;
1609 	}
1610 
1611 	return index;
1612 }
1613 
1614 
1615 static void
1616 deferred_deleter(void *arg, int iteration)
1617 {
1618 	// move entries to on-stack list
1619 	InterruptsSpinLocker locker(sDeferredFreeListLock);
1620 	if (sDeferredFreeList.IsEmpty())
1621 		return;
1622 
1623 	DeferredFreeList entries;
1624 	entries.MoveFrom(&sDeferredFreeList);
1625 
1626 	locker.Unlock();
1627 
1628 	// free the entries
1629 	while (DeferredFreeListEntry* entry = entries.RemoveHead())
1630 		free(entry);
1631 }
1632 
1633 
1634 //	#pragma mark -
1635 
1636 
1637 static status_t
1638 heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1639 {
1640 	void *address = NULL;
1641 	area_id heapArea = create_area(name, &address,
1642 		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1643 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1644 	if (heapArea < B_OK) {
1645 		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1646 		return heapArea;
1647 	}
1648 
1649 	heap_add_area(heap, heapArea, (addr_t)address, size);
1650 #if PARANOID_HEAP_VALIDATION
1651 	heap_validate_heap(heap);
1652 #endif
1653 	return B_OK;
1654 }
1655 
1656 
1657 static int32
1658 heap_grow_thread(void *)
1659 {
1660 	while (true) {
1661 		// wait for a request to grow the heap list
1662 		if (acquire_sem(sHeapGrowSem) < B_OK)
1663 			continue;
1664 
1665 		if (sAddGrowHeap) {
1666 			// the grow heap is going to run full soon, try to allocate a new
1667 			// one to make some room.
1668 			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1669 			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1670 				HEAP_DEDICATED_GROW_SIZE) != B_OK)
1671 				dprintf("heap_grower: failed to create new grow heap area\n");
1672 		}
1673 
1674 		for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1675 			heap_allocator *heap = sHeaps[i];
1676 			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1677 				|| heap_should_grow(heap)) {
1678 				// grow this heap if it is nearly full or if a grow was
1679 				// explicitly requested for this heap (happens when a large
1680 				// allocation cannot be fulfilled due to lack of contiguous
1681 				// pages)
1682 				if (heap_create_new_heap_area(heap, "additional heap",
1683 					HEAP_GROW_SIZE) != B_OK)
1684 					dprintf("heap_grower: failed to create new heap area\n");
1685 				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1686 			}
1687 		}
1688 
1689 		// notify anyone waiting for this request
1690 		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1691 	}
1692 
1693 	return 0;
1694 }
1695 
1696 
1697 status_t
1698 heap_init(addr_t base, size_t size)
1699 {
1700 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1701 		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
1702 		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
1703 			&sHeapClasses[i]);
1704 		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
1705 		base += partSize;
1706 	}
1707 
1708 	// set up some debug commands
1709 	add_debugger_command_etc("heap", &dump_heap_list,
1710 		"Dump infos about the kernel heap(s)",
1711 		"[(\"grow\" | \"stats\" | <heap>)]\n"
1712 		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
1713 		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
1714 		"given as the argument, currently only the heap count is printed.\n"
1715 		"If <heap> is given, it is interpreted as the address of the heap to\n"
1716 		"print infos about.\n", 0);
1717 #if KERNEL_HEAP_LEAK_CHECK
1718 	add_debugger_command_etc("allocations", &dump_allocations,
1719 		"Dump current allocations",
1720 		"[(\"team\" | \"thread\") <id>] [ \"caller\" <address> ] [\"stats\"]\n"
1721 		"If no parameters are given, all current alloactions are dumped.\n"
1722 		"If \"team\", \"thread\", and/or \"caller\" is specified as the first\n"
1723 		"argument, only allocations matching the team id, thread id, or \n"
1724 		"caller address given in the second argument are printed.\n"
1725 		"If the optional argument \"stats\" is specified, only the allocation\n"
1726 		"counts and no individual allocations are printed\n", 0);
1727 	add_debugger_command_etc("allocations_per_caller",
1728 		&dump_allocations_per_caller,
1729 		"Dump current allocations summed up per caller",
1730 		"[ \"-c\" ] [ -h <heap> ]\n"
1731 		"The current allocations will by summed up by caller (their count and\n"
1732 		"size) printed in decreasing order by size or, if \"-c\" is\n"
1733 		"specified, by allocation count. If given <heap> specifies the\n"
1734 		"address of the heap for which to print the allocations.\n", 0);
1735 #endif
1736 	return B_OK;
1737 }
1738 
1739 
1740 status_t
1741 heap_init_post_sem()
1742 {
1743 	sHeapGrowSem = create_sem(0, "heap_grow_sem");
1744 	if (sHeapGrowSem < 0) {
1745 		panic("heap_init_post_sem(): failed to create heap grow sem\n");
1746 		return B_ERROR;
1747 	}
1748 
1749 	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
1750 	if (sHeapGrownNotify < 0) {
1751 		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
1752 		return B_ERROR;
1753 	}
1754 
1755 	return B_OK;
1756 }
1757 
1758 
1759 status_t
1760 heap_init_post_thread()
1761 {
1762 	void *address = NULL;
1763 	area_id growHeapArea = create_area("dedicated grow heap", &address,
1764 		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
1765 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1766 	if (growHeapArea < B_OK) {
1767 		panic("heap_init_post_thread(): couldn't allocate dedicate grow heap area");
1768 		return growHeapArea;
1769 	}
1770 
1771 	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
1772 		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0]);
1773 	if (sGrowHeap == NULL) {
1774 		panic("heap_init_post_thread(): failed to create dedicated grow heap\n");
1775 		return B_ERROR;
1776 	}
1777 
1778 	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
1779 		B_URGENT_PRIORITY, NULL);
1780 	if (sHeapGrowThread < 0) {
1781 		panic("heap_init_post_thread(): cannot create heap grow thread\n");
1782 		return sHeapGrowThread;
1783 	}
1784 
1785 	if (register_kernel_daemon(deferred_deleter, NULL, 50) != B_OK)
1786 		panic("heap_init_post_thread(): failed to init deferred deleter");
1787 
1788 	send_signal_etc(sHeapGrowThread, SIGCONT, B_DO_NOT_RESCHEDULE);
1789 	return B_OK;
1790 }
1791 
1792 
1793 //	#pragma mark - Public API
1794 
1795 
1796 void *
1797 memalign(size_t alignment, size_t size)
1798 {
1799 	if (!gKernelStartup && !are_interrupts_enabled()) {
1800 		panic("memalign(): called with interrupts disabled\n");
1801 		return NULL;
1802 	}
1803 
1804 	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
1805 		// don't even attempt such a huge allocation - use areas instead
1806 		size_t areaSize = size + sizeof(area_allocation_info);
1807 		if (alignment != 0)
1808 			areaSize = ROUNDUP(areaSize, alignment);
1809 
1810 		void *address = NULL;
1811 		areaSize = ROUNDUP(areaSize, B_PAGE_SIZE);
1812 		area_id allocationArea = create_area("memalign area", &address,
1813 			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
1814 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1815 		if (allocationArea < B_OK) {
1816 			dprintf("heap: failed to create area for huge allocation\n");
1817 			return NULL;
1818 		}
1819 
1820 		area_allocation_info *info = (area_allocation_info *)address;
1821 		info->magic = kAreaAllocationMagic;
1822 		info->area = allocationArea;
1823 		info->base = address;
1824 		info->size = areaSize;
1825 		info->allocation_size = size;
1826 		info->allocation_alignment = alignment;
1827 
1828 		address = (void *)((addr_t)address + sizeof(area_allocation_info));
1829 		if (alignment != 0)
1830 			address = (void *)ROUNDUP((addr_t)address, alignment);
1831 
1832 		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
1833 			allocationArea, size));
1834 
1835 		info->allocation_base = address;
1836 
1837 #if PARANOID_KERNEL_MALLOC
1838 		memset(address, 0xcc, size);
1839 #endif
1840 		return address;
1841 	}
1842 
1843 	uint32 heapClass = heap_class_for(size);
1844 	heap_allocator *heap = sHeaps[heapClass];
1845 	void *result = heap_memalign(heap, alignment, size);
1846 	if (result == NULL) {
1847 		// request an urgent grow and wait - we don't do it ourselfs here to
1848 		// serialize growing through the grow thread, as otherwise multiple
1849 		// threads hitting this situation (likely when memory ran out) would
1850 		// all add areas
1851 		sLastGrowRequest[heapClass]++;
1852 		switch_sem(sHeapGrowSem, sHeapGrownNotify);
1853 
1854 		// and then try again
1855 		result = heap_memalign(heap, alignment, size);
1856 	} else if (heap_should_grow(heap)) {
1857 		// should grow sometime soon, notify the grower
1858 		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1859 	}
1860 
1861 #if PARANOID_HEAP_VALIDATION
1862 	heap_validate_heap(heap);
1863 #endif
1864 
1865 	if (result == NULL)
1866 		panic("heap: kernel heap has run out of memory\n");
1867 	return result;
1868 }
1869 
1870 
1871 void *
1872 malloc_nogrow(size_t size)
1873 {
1874 	// use dedicated memory in the grow thread by default
1875 	if (thread_get_current_thread_id() == sHeapGrowThread) {
1876 		void *result = heap_memalign(sGrowHeap, 0, size);
1877 		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1878 			// hopefully the heap grower will manage to create a new heap
1879 			// before running out of private memory...
1880 			dprintf("heap: requesting new grow heap\n");
1881 			sAddGrowHeap = true;
1882 			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1883 		}
1884 
1885 		if (result != NULL)
1886 			return result;
1887 	}
1888 
1889 	// try public memory, there might be something available
1890 	heap_allocator *heap = sHeaps[heap_class_for(size)];
1891 	void *result = heap_memalign(heap, 0, size);
1892 	if (result != NULL)
1893 		return result;
1894 
1895 	// no memory available
1896 	if (thread_get_current_thread_id() == sHeapGrowThread)
1897 		panic("heap: all heaps have run out of memory while growing\n");
1898 	else
1899 		dprintf("heap: all heaps have run out of memory\n");
1900 
1901 	return NULL;
1902 }
1903 
1904 
1905 void *
1906 malloc(size_t size)
1907 {
1908 	return memalign(0, size);
1909 }
1910 
1911 
1912 void
1913 free(void *address)
1914 {
1915 	if (!gKernelStartup && !are_interrupts_enabled()) {
1916 		panic("free(): called with interrupts disabled\n");
1917 		return;
1918 	}
1919 
1920 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1921 		heap_allocator *heap = sHeaps[i];
1922 		if (heap_free(heap, address) == B_OK) {
1923 #if PARANOID_HEAP_VALIDATION
1924 			heap_validate_heap(heap);
1925 #endif
1926 			return;
1927 		}
1928 	}
1929 
1930 	// maybe it was allocated from the dedicated grow heap
1931 	if (heap_free(sGrowHeap, address) == B_OK)
1932 		return;
1933 
1934 	// or maybe it was a huge allocation using an area
1935 	area_info areaInfo;
1936 	area_id area = area_for(address);
1937 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
1938 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
1939 
1940 		// just make extra sure it was allocated by us
1941 		if (info->magic == kAreaAllocationMagic && info->area == area
1942 			&& info->size == areaInfo.size && info->base == areaInfo.address
1943 			&& info->allocation_size < areaInfo.size) {
1944 			delete_area(area);
1945 			TRACE(("free(): freed huge allocation by deleting area %ld\n", area));
1946 			return;
1947 		}
1948 	}
1949 
1950 	panic("free(): free failed for address %p\n", address);
1951 }
1952 
1953 
1954 void *
1955 realloc(void *address, size_t newSize)
1956 {
1957 	if (!gKernelStartup && !are_interrupts_enabled()) {
1958 		panic("realloc(): called with interrupts disabled\n");
1959 		return NULL;
1960 	}
1961 
1962 	if (address == NULL)
1963 		return memalign(0, newSize);
1964 
1965 	if (newSize == 0) {
1966 		free(address);
1967 		return NULL;
1968 	}
1969 
1970 	void *newAddress = NULL;
1971 	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
1972 		heap_allocator *heap = sHeaps[i];
1973 		if (heap_realloc(heap, address, &newAddress, newSize) == B_OK) {
1974 #if PARANOID_HEAP_VALIDATION
1975 			heap_validate_heap(heap);
1976 #endif
1977 			return newAddress;
1978 		}
1979 	}
1980 
1981 	// maybe it was allocated from the dedicated grow heap
1982 	if (heap_realloc(sGrowHeap, address, &newAddress, newSize) == B_OK)
1983 		return newAddress;
1984 
1985 	// or maybe it was a huge allocation using an area
1986 	area_info areaInfo;
1987 	area_id area = area_for(address);
1988 	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
1989 		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
1990 
1991 		// just make extra sure it was allocated by us
1992 		if (info->magic == kAreaAllocationMagic && info->area == area
1993 			&& info->size == areaInfo.size && info->base == areaInfo.address
1994 			&& info->allocation_size < areaInfo.size) {
1995 			size_t available = info->size - ((addr_t)info->allocation_base
1996 				- (addr_t)info->base);
1997 
1998 			if (available >= newSize) {
1999 				// there is enough room available for the newSize
2000 				TRACE(("realloc(): new size %ld fits in old area %ld with %ld available\n",
2001 					newSize, area, available));
2002 				return address;
2003 			}
2004 
2005 			// have to allocate/copy/free - TODO maybe resize the area instead?
2006 			newAddress = memalign(0, newSize);
2007 			if (newAddress == NULL) {
2008 				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2009 					newSize);
2010 				return NULL;
2011 			}
2012 
2013 			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2014 			delete_area(area);
2015 			TRACE(("realloc(): allocated new block %p for size %ld and deleted old area %ld\n",
2016 				newAddress, newSize, area));
2017 			return newAddress;
2018 		}
2019 	}
2020 
2021 	panic("realloc(): failed to realloc address %p to size %lu\n", address, newSize);
2022 	return NULL;
2023 }
2024 
2025 
2026 void *
2027 calloc(size_t numElements, size_t size)
2028 {
2029 	void *address = memalign(0, numElements * size);
2030 	if (address != NULL)
2031 		memset(address, 0, numElements * size);
2032 
2033 	return address;
2034 }
2035 
2036 
2037 void
2038 deferred_free(void* block)
2039 {
2040 	if (block == NULL)
2041 		return;
2042 
2043 	// TODO: Use SinglyLinkedList, so that we only need sizeof(void*).
2044 	DeferredFreeListEntry* entry = new(block) DeferredFreeListEntry;
2045 
2046 	InterruptsSpinLocker _(sDeferredFreeListLock);
2047 	sDeferredFreeList.Add(entry);
2048 }
2049 
2050 
2051 void*
2052 malloc_referenced(size_t size)
2053 {
2054 	int32* referencedData = (int32*)malloc(size + 4);
2055 	if (referencedData == NULL)
2056 		return NULL;
2057 
2058 	*referencedData = 1;
2059 
2060 	return referencedData + 1;
2061 }
2062 
2063 
2064 void*
2065 malloc_referenced_acquire(void* data)
2066 {
2067 	if (data != NULL) {
2068 		int32* referencedData = (int32*)data - 1;
2069 		atomic_add(referencedData, 1);
2070 	}
2071 
2072 	return data;
2073 }
2074 
2075 
2076 void
2077 malloc_referenced_release(void* data)
2078 {
2079 	if (data == NULL)
2080 		return;
2081 
2082 	int32* referencedData = (int32*)data - 1;
2083 	if (atomic_add(referencedData, -1) < 1)
2084 		free(referencedData);
2085 }
2086