xref: /haiku/src/system/boot/platform/bios_ia32/mmu.cpp (revision 9e25244c5e9051f6cd333820d6332397361abd6c)
1 /*
2  * Copyright 2004-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Based on code written by Travis Geiselbrecht for NewOS.
4  *
5  * Distributed under the terms of the MIT License.
6  */
7 
8 
9 #include "mmu.h"
10 
11 #include <string.h>
12 
13 #include <OS.h>
14 
15 #include <arch/cpu.h>
16 #include <arch/x86/descriptors.h>
17 #include <arch_kernel.h>
18 #include <boot/platform.h>
19 #include <boot/stdio.h>
20 #include <boot/kernel_args.h>
21 #include <boot/stage2.h>
22 #include <kernel.h>
23 
24 #include "bios.h"
25 #include "interrupts.h"
26 
27 
28 /*!	The (physical) memory layout of the boot loader is currently as follows:
29 	  0x0500 - 0x10000	protected mode stack
30 	  0x0500 - 0x09000	real mode stack
31 	 0x10000 - ?		code (up to ~500 kB)
32 	 0x90000			1st temporary page table (identity maps 0-4 MB)
33 	 0x91000			2nd (4-8 MB)
34 	 0x92000 - 0x92000	further page tables
35 	 0x9e000 - 0xa0000	SMP trampoline code
36 	[0xa0000 - 0x100000	BIOS/ROM/reserved area]
37 	0x100000			page directory
38 	     ...			boot loader heap (32 kB)
39 	     ...			free physical memory
40 
41 	The first 8 MB are identity mapped (0x0 - 0x0800000); paging is turned
42 	on. The kernel is mapped at 0x80000000, all other stuff mapped by the
43 	loader (kernel args, modules, driver settings, ...) comes after
44 	0x80020000 which means that there is currently only 2 MB reserved for
45 	the kernel itself (see kMaxKernelSize).
46 
47 	The layout in PXE mode differs a bit from this, see definitions below.
48 */
49 
50 //#define TRACE_MMU
51 #ifdef TRACE_MMU
52 #	define TRACE(x...) dprintf(x)
53 #else
54 #	define TRACE(x...) ;
55 #endif
56 
57 
58 //#define TRACE_MEMORY_MAP
59 	// Define this to print the memory map to serial debug,
60 	// You also need to define ENABLE_SERIAL in serial.cpp
61 	// for output to work.
62 
63 
64 // memory structure returned by int 0x15, ax 0xe820
65 struct extended_memory {
66 	uint64 base_addr;
67 	uint64 length;
68 	uint32 type;
69 };
70 
71 
72 segment_descriptor gBootGDT[BOOT_GDT_SEGMENT_COUNT];
73 
74 static const uint32 kDefaultPageTableFlags = 0x07;	// present, user, R/W
75 static const size_t kMaxKernelSize = 0x1000000;		// 16 MB for the kernel
76 
77 // working page directory and page table
78 static uint32 *sPageDirectory = 0;
79 
80 #ifdef _PXE_ENV
81 
82 static addr_t sNextPhysicalAddress = 0x112000;
83 static addr_t sNextVirtualAddress = KERNEL_LOAD_BASE_32_BIT + kMaxKernelSize;
84 
85 static addr_t sNextPageTableAddress = 0x7d000;
86 static const uint32 kPageTableRegionEnd = 0x8b000;
87 	// we need to reserve 2 pages for the SMP trampoline code
88 
89 #else
90 
91 static addr_t sNextPhysicalAddress = 0x100000;
92 static addr_t sNextVirtualAddress = KERNEL_LOAD_BASE_32_BIT + kMaxKernelSize;
93 
94 static addr_t sNextPageTableAddress = 0x90000;
95 static const uint32 kPageTableRegionEnd = 0x9e000;
96 	// we need to reserve 2 pages for the SMP trampoline code
97 
98 #endif
99 
100 
101 static addr_t
102 get_next_virtual_address(size_t size)
103 {
104 	addr_t address = sNextVirtualAddress;
105 	sNextVirtualAddress += size;
106 
107 	return address;
108 }
109 
110 
111 static addr_t
112 get_next_physical_address(size_t size)
113 {
114 	uint64 base;
115 	if (!get_free_address_range(gKernelArgs.physical_allocated_range,
116 			gKernelArgs.num_physical_allocated_ranges, sNextPhysicalAddress,
117 			size, &base)) {
118 		panic("Out of physical memory!");
119 		return 0;
120 	}
121 
122 	insert_physical_allocated_range(base, size);
123 	sNextPhysicalAddress = base + size;
124 		// TODO: Can overflow theoretically.
125 
126 	return base;
127 }
128 
129 
130 static addr_t
131 get_next_virtual_page()
132 {
133 	return get_next_virtual_address(B_PAGE_SIZE);
134 }
135 
136 
137 static addr_t
138 get_next_physical_page()
139 {
140 	return get_next_physical_address(B_PAGE_SIZE);
141 }
142 
143 
144 static uint32 *
145 get_next_page_table()
146 {
147 	TRACE("get_next_page_table, sNextPageTableAddress %#" B_PRIxADDR
148 		", kPageTableRegionEnd %#" B_PRIxADDR "\n", sNextPageTableAddress,
149 		kPageTableRegionEnd);
150 
151 	addr_t address = sNextPageTableAddress;
152 	if (address >= kPageTableRegionEnd)
153 		return (uint32 *)get_next_physical_page();
154 
155 	sNextPageTableAddress += B_PAGE_SIZE;
156 	return (uint32 *)address;
157 }
158 
159 
160 /*!	Adds a new page table for the specified base address */
161 static uint32*
162 add_page_table(addr_t base)
163 {
164 	if (gKernelArgs.arch_args.num_pgtables == MAX_BOOT_PTABLES) {
165 		panic("gKernelArgs.arch_args.pgtables overflow");
166 		return NULL;
167 	}
168 
169 	base = ROUNDDOWN(base, B_PAGE_SIZE * 1024);
170 
171 	// Get new page table and clear it out
172 	uint32 *pageTable = get_next_page_table();
173 	if (pageTable > (uint32 *)(8 * 1024 * 1024)) {
174 		panic("tried to add page table beyond the identity mapped 8 MB "
175 			"region\n");
176 		return NULL;
177 	}
178 
179 	TRACE("add_page_table(base = %p), got page: %p\n", (void*)base, pageTable);
180 
181 	gKernelArgs.arch_args.pgtables[gKernelArgs.arch_args.num_pgtables++]
182 		= (uint32)pageTable;
183 
184 	for (int32 i = 0; i < 1024; i++)
185 		pageTable[i] = 0;
186 
187 	// put the new page table into the page directory
188 	sPageDirectory[base / (4 * 1024 * 1024)]
189 		= (uint32)pageTable | kDefaultPageTableFlags;
190 
191 	// update the virtual end address in the kernel args
192 	base += B_PAGE_SIZE * 1024;
193 	if (base > gKernelArgs.arch_args.virtual_end)
194 		gKernelArgs.arch_args.virtual_end = base;
195 
196 	return pageTable;
197 }
198 
199 
200 static void
201 unmap_page(addr_t virtualAddress)
202 {
203 	TRACE("unmap_page(virtualAddress = %p)\n", (void *)virtualAddress);
204 
205 	if (virtualAddress < KERNEL_LOAD_BASE_32_BIT) {
206 		panic("unmap_page: asked to unmap invalid page %p!\n",
207 			(void *)virtualAddress);
208 	}
209 
210 	// unmap the page from the correct page table
211 	uint32 *pageTable = (uint32 *)(sPageDirectory[virtualAddress
212 		/ (B_PAGE_SIZE * 1024)] & 0xfffff000);
213 	pageTable[(virtualAddress % (B_PAGE_SIZE * 1024)) / B_PAGE_SIZE] = 0;
214 
215 	asm volatile("invlpg (%0)" : : "r" (virtualAddress));
216 }
217 
218 
219 /*!	Creates an entry to map the specified virtualAddress to the given
220 	physicalAddress.
221 	If the mapping goes beyond the current page table, it will allocate
222 	a new one. If it cannot map the requested page, it panics.
223 */
224 static void
225 map_page(addr_t virtualAddress, addr_t physicalAddress, uint32 flags)
226 {
227 	TRACE("map_page: vaddr 0x%lx, paddr 0x%lx\n", virtualAddress,
228 		physicalAddress);
229 
230 	if (virtualAddress < KERNEL_LOAD_BASE_32_BIT) {
231 		panic("map_page: asked to map invalid page %p!\n",
232 			(void *)virtualAddress);
233 	}
234 
235 	uint32 *pageTable = (uint32 *)(sPageDirectory[virtualAddress
236 		/ (B_PAGE_SIZE * 1024)] & 0xfffff000);
237 
238 	if (pageTable == NULL) {
239 		// we need to add a new page table
240 		pageTable = add_page_table(virtualAddress);
241 
242 		if (pageTable == NULL) {
243 			panic("map_page: failed to allocate a page table for virtual "
244 				"address %p\n", (void*)virtualAddress);
245 			return;
246 		}
247 	}
248 
249 	physicalAddress &= ~(B_PAGE_SIZE - 1);
250 
251 	// map the page to the correct page table
252 	uint32 tableEntry = (virtualAddress % (B_PAGE_SIZE * 1024)) / B_PAGE_SIZE;
253 
254 	TRACE("map_page: inserting pageTable %p, tableEntry %" B_PRIu32
255 		", physicalAddress %#" B_PRIxADDR "\n", pageTable, tableEntry,
256 		physicalAddress);
257 
258 	pageTable[tableEntry] = physicalAddress | flags;
259 
260 	asm volatile("invlpg (%0)" : : "r" (virtualAddress));
261 
262 	TRACE("map_page: done\n");
263 }
264 
265 
266 #ifdef TRACE_MEMORY_MAP
267 static const char *
268 e820_memory_type(uint32 type)
269 {
270 	switch (type) {
271 		case 1: return "memory";
272 		case 2: return "reserved";
273 		case 3: return "ACPI reclaim";
274 		case 4: return "ACPI NVS";
275 		default: return "unknown/reserved";
276 	}
277 }
278 #endif
279 
280 
281 static uint32
282 get_memory_map(extended_memory **_extendedMemory)
283 {
284 	extended_memory *block = (extended_memory *)kExtraSegmentScratch;
285 	bios_regs regs = {0, 0, sizeof(extended_memory), 0, 0, (uint32)block, 0, 0};
286 	uint32 count = 0;
287 
288 	TRACE("get_memory_map()\n");
289 
290 	do {
291 		regs.eax = 0xe820;
292 		regs.edx = 'SMAP';
293 
294 		call_bios(0x15, &regs);
295 		if ((regs.flags & CARRY_FLAG) != 0)
296 			return 0;
297 
298 		regs.edi += sizeof(extended_memory);
299 		count++;
300 	} while (regs.ebx != 0);
301 
302 	*_extendedMemory = block;
303 
304 #ifdef TRACE_MEMORY_MAP
305 	dprintf("extended memory info (from 0xe820):\n");
306 	for (uint32 i = 0; i < count; i++) {
307 		dprintf("    base 0x%08Lx, len 0x%08Lx, type %lu (%s)\n",
308 			block[i].base_addr, block[i].length,
309 			block[i].type, e820_memory_type(block[i].type));
310 	}
311 #endif
312 
313 	return count;
314 }
315 
316 
317 static void
318 init_page_directory(void)
319 {
320 	TRACE("init_page_directory\n");
321 
322 	// allocate a new pgdir
323 	sPageDirectory = (uint32 *)get_next_physical_page();
324 	gKernelArgs.arch_args.phys_pgdir = (uint32)sPageDirectory;
325 
326 	// clear out the pgdir
327 	for (int32 i = 0; i < 1024; i++) {
328 		sPageDirectory[i] = 0;
329 	}
330 
331 	// Identity map the first 8 MB of memory so that their
332 	// physical and virtual address are the same.
333 	// These page tables won't be taken over into the kernel.
334 
335 	// make the first page table at the first free spot
336 	uint32 *pageTable = get_next_page_table();
337 
338 	for (int32 i = 0; i < 1024; i++) {
339 		pageTable[i] = (i * 0x1000) | kDefaultPageFlags;
340 	}
341 
342 	sPageDirectory[0] = (uint32)pageTable | kDefaultPageFlags;
343 
344 	// make the second page table
345 	pageTable = get_next_page_table();
346 
347 	for (int32 i = 0; i < 1024; i++) {
348 		pageTable[i] = (i * 0x1000 + 0x400000) | kDefaultPageFlags;
349 	}
350 
351 	sPageDirectory[1] = (uint32)pageTable | kDefaultPageFlags;
352 
353 	gKernelArgs.arch_args.num_pgtables = 0;
354 
355 	// switch to the new pgdir and enable paging
356 	asm("movl %0, %%eax;"
357 		"movl %%eax, %%cr3;" : : "m" (sPageDirectory) : "eax");
358 	// Important.  Make sure supervisor threads can fault on read only pages...
359 	asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
360 }
361 
362 
363 //	#pragma mark -
364 
365 
366 /*!
367 	Neither \a virtualAddress nor \a size need to be aligned, but the function
368 	will map all pages the range intersects with.
369 	If physicalAddress is not page-aligned, the returned virtual address will
370 	have the same "misalignment".
371 */
372 extern "C" addr_t
373 mmu_map_physical_memory(addr_t physicalAddress, size_t size, uint32 flags)
374 {
375 	addr_t address = sNextVirtualAddress;
376 	addr_t pageOffset = physicalAddress & (B_PAGE_SIZE - 1);
377 
378 	physicalAddress -= pageOffset;
379 	size += pageOffset;
380 
381 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
382 		map_page(get_next_virtual_page(), physicalAddress + offset, flags);
383 	}
384 
385 	return address + pageOffset;
386 }
387 
388 
389 extern "C" void *
390 mmu_allocate(void *virtualAddress, size_t size)
391 {
392 	TRACE("mmu_allocate: requested vaddr: %p, next free vaddr: 0x%lx, size: "
393 		"%ld\n", virtualAddress, sNextVirtualAddress, size);
394 
395 	size = (size + B_PAGE_SIZE - 1) / B_PAGE_SIZE;
396 		// get number of pages to map
397 
398 	if (virtualAddress != NULL) {
399 		// This special path is almost only useful for loading the
400 		// kernel into memory; it will only allow you to map the
401 		// 'kMaxKernelSize' bytes following the kernel base address.
402 		// Also, it won't check for already mapped addresses, so
403 		// you better know why you are here :)
404 		addr_t address = (addr_t)virtualAddress;
405 
406 		// is the address within the valid range?
407 		if (address < KERNEL_LOAD_BASE_32_BIT || address + size * B_PAGE_SIZE
408 			>= KERNEL_LOAD_BASE_32_BIT + kMaxKernelSize)
409 			return NULL;
410 
411 		for (uint32 i = 0; i < size; i++) {
412 			map_page(address, get_next_physical_page(), kDefaultPageFlags);
413 			address += B_PAGE_SIZE;
414 		}
415 
416 		return virtualAddress;
417 	}
418 
419 	void *address = (void *)sNextVirtualAddress;
420 
421 	for (uint32 i = 0; i < size; i++) {
422 		map_page(get_next_virtual_page(), get_next_physical_page(),
423 			kDefaultPageFlags);
424 	}
425 
426 	return address;
427 }
428 
429 
430 /*!	Allocates a single page and returns both its virtual and physical
431 	addresses.
432 */
433 void *
434 mmu_allocate_page(addr_t *_physicalAddress)
435 {
436 	addr_t virt = get_next_virtual_page();
437 	addr_t phys = get_next_physical_page();
438 
439 	map_page(virt, phys, kDefaultPageFlags);
440 
441 	if (_physicalAddress)
442 		*_physicalAddress = phys;
443 
444 	return (void *)virt;
445 }
446 
447 
448 /*!	Allocates the given physical range.
449 	\return \c true, if the range could be allocated, \c false otherwise.
450 */
451 bool
452 mmu_allocate_physical(addr_t base, size_t size)
453 {
454 	// check whether the physical memory range exists at all
455 	if (!is_address_range_covered(gKernelArgs.physical_memory_range,
456 			gKernelArgs.num_physical_memory_ranges, base, size)) {
457 		return false;
458 	}
459 
460 	// check whether the physical range is still free
461 	uint64 foundBase;
462 	if (!get_free_address_range(gKernelArgs.physical_allocated_range,
463 			gKernelArgs.num_physical_allocated_ranges, base, size, &foundBase)
464 		|| foundBase != base) {
465 		return false;
466 	}
467 
468 	return insert_physical_allocated_range(base, size) == B_OK;
469 }
470 
471 
472 /*!	This will unmap the allocated chunk of memory from the virtual
473 	address space. It might not actually free memory (as its implementation
474 	is very simple), but it might.
475 	Neither \a virtualAddress nor \a size need to be aligned, but the function
476 	will unmap all pages the range intersects with.
477 */
478 extern "C" void
479 mmu_free(void *virtualAddress, size_t size)
480 {
481 	TRACE("mmu_free(virtualAddress = %p, size: %ld)\n", virtualAddress, size);
482 
483 	addr_t address = (addr_t)virtualAddress;
484 	addr_t pageOffset = address % B_PAGE_SIZE;
485 	address -= pageOffset;
486 	size = (size + pageOffset + B_PAGE_SIZE - 1) / B_PAGE_SIZE * B_PAGE_SIZE;
487 
488 	// is the address within the valid range?
489 	if (address < KERNEL_LOAD_BASE_32_BIT || address + size > sNextVirtualAddress) {
490 		panic("mmu_free: asked to unmap out of range region (%p, size %lx)\n",
491 			(void *)address, size);
492 	}
493 
494 	// unmap all pages within the range
495 	for (size_t i = 0; i < size; i += B_PAGE_SIZE) {
496 		unmap_page(address);
497 		address += B_PAGE_SIZE;
498 	}
499 
500 	if (address == sNextVirtualAddress) {
501 		// we can actually reuse the virtual address space
502 		sNextVirtualAddress -= size;
503 	}
504 }
505 
506 
507 size_t
508 mmu_get_virtual_usage()
509 {
510 	return sNextVirtualAddress - KERNEL_LOAD_BASE_32_BIT;
511 }
512 
513 
514 bool
515 mmu_get_virtual_mapping(addr_t virtualAddress, addr_t *_physicalAddress)
516 {
517 	if (virtualAddress < KERNEL_LOAD_BASE_32_BIT) {
518 		panic("mmu_get_virtual_mapping: asked to lookup invalid page %p!\n",
519 			(void *)virtualAddress);
520 	}
521 
522 	uint32 dirEntry = sPageDirectory[virtualAddress / (B_PAGE_SIZE * 1024)];
523 	if ((dirEntry & (1 << 0)) == 0)
524 		return false;
525 
526 	uint32 *pageTable = (uint32 *)(dirEntry & 0xfffff000);
527 	uint32 tableEntry = pageTable[(virtualAddress % (B_PAGE_SIZE * 1024))
528 		/ B_PAGE_SIZE];
529 	if ((tableEntry & (1 << 0)) == 0)
530 		return false;
531 
532 	*_physicalAddress = tableEntry & 0xfffff000;
533 	return true;
534 }
535 
536 
537 /*!	Sets up the final and kernel accessible GDT and IDT tables.
538 	BIOS calls won't work any longer after this function has
539 	been called.
540 */
541 extern "C" void
542 mmu_init_for_kernel(void)
543 {
544 	TRACE("mmu_init_for_kernel\n");
545 
546 	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
547 		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
548 		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
549 		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
550 
551 	// set up a new gdt
552 
553 	// put standard segment descriptors in GDT
554 	clear_segment_descriptor(&gBootGDT[0]);
555 
556 	// seg 0x08 - kernel 4GB code
557 	set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], 0, 0xffffffff,
558 		DT_CODE_READABLE, DPL_KERNEL);
559 
560 	// seg 0x10 - kernel 4GB data
561 	set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], 0, 0xffffffff,
562 		DT_DATA_WRITEABLE, DPL_KERNEL);
563 
564 	// seg 0x1b - ring 3 user 4GB code
565 	set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], 0, 0xffffffff,
566 		DT_CODE_READABLE, DPL_USER);
567 
568 	// seg 0x23 - ring 3 user 4GB data
569 	set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], 0, 0xffffffff,
570 		DT_DATA_WRITEABLE, DPL_USER);
571 
572 	// load the GDT
573 	struct gdt_idt_descr gdtDescriptor;
574 	gdtDescriptor.limit = sizeof(gBootGDT);
575 	gdtDescriptor.base = gBootGDT;
576 
577 	asm("lgdt %0" : : "m" (gdtDescriptor));
578 
579 	TRACE("gdt at virtual address %p\n", gBootGDT);
580 
581 	// Save the memory we've virtually allocated (for the kernel and other
582 	// stuff)
583 	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_32_BIT;
584 	gKernelArgs.virtual_allocated_range[0].size
585 		= sNextVirtualAddress - KERNEL_LOAD_BASE_32_BIT;
586 	gKernelArgs.num_virtual_allocated_ranges = 1;
587 
588 	// sort the address ranges
589 	sort_address_ranges(gKernelArgs.physical_memory_range,
590 		gKernelArgs.num_physical_memory_ranges);
591 	sort_address_ranges(gKernelArgs.physical_allocated_range,
592 		gKernelArgs.num_physical_allocated_ranges);
593 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
594 		gKernelArgs.num_virtual_allocated_ranges);
595 
596 #ifdef TRACE_MEMORY_MAP
597 	{
598 		uint32 i;
599 
600 		dprintf("phys memory ranges:\n");
601 		for (i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
602 			dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
603 				gKernelArgs.physical_memory_range[i].start,
604 				gKernelArgs.physical_memory_range[i].size);
605 		}
606 
607 		dprintf("allocated phys memory ranges:\n");
608 		for (i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
609 			dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
610 				gKernelArgs.physical_allocated_range[i].start,
611 				gKernelArgs.physical_allocated_range[i].size);
612 		}
613 
614 		dprintf("allocated virt memory ranges:\n");
615 		for (i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
616 			dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
617 				gKernelArgs.virtual_allocated_range[i].start,
618 				gKernelArgs.virtual_allocated_range[i].size);
619 		}
620 	}
621 #endif
622 }
623 
624 
625 extern "C" void
626 mmu_init(void)
627 {
628 	TRACE("mmu_init\n");
629 
630 	gKernelArgs.arch_args.virtual_end = KERNEL_LOAD_BASE_32_BIT;
631 
632 	gKernelArgs.physical_allocated_range[0].start = sNextPhysicalAddress;
633 	gKernelArgs.physical_allocated_range[0].size = 0;
634 	gKernelArgs.num_physical_allocated_ranges = 1;
635 		// remember the start of the allocated physical pages
636 
637 	init_page_directory();
638 
639 	// Map the page directory into kernel space at 0xffc00000-0xffffffff
640 	// this enables a mmu trick where the 4 MB region that this pgdir entry
641 	// represents now maps the 4MB of potential pagetables that the pgdir
642 	// points to. Thrown away later in VM bringup, but useful for now.
643 	sPageDirectory[1023] = (uint32)sPageDirectory | kDefaultPageFlags;
644 
645 	// also map it on the next vpage
646 	gKernelArgs.arch_args.vir_pgdir = get_next_virtual_page();
647 	map_page(gKernelArgs.arch_args.vir_pgdir, (uint32)sPageDirectory,
648 		kDefaultPageFlags);
649 
650 	// map in a kernel stack
651 	gKernelArgs.cpu_kstack[0].start = (addr_t)mmu_allocate(NULL,
652 		KERNEL_STACK_SIZE + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE);
653 	gKernelArgs.cpu_kstack[0].size = KERNEL_STACK_SIZE
654 		+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE;
655 
656 	TRACE("kernel stack at 0x%" B_PRIx64 " to 0x%" B_PRIx64 "\n",
657 		gKernelArgs.cpu_kstack[0].start, gKernelArgs.cpu_kstack[0].start
658 		+ gKernelArgs.cpu_kstack[0].size);
659 
660 	extended_memory *extMemoryBlock;
661 	uint32 extMemoryCount = get_memory_map(&extMemoryBlock);
662 
663 	// figure out the memory map
664 	if (extMemoryCount > 0) {
665 		gKernelArgs.num_physical_memory_ranges = 0;
666 
667 		// first scan: add all usable ranges
668 		for (uint32 i = 0; i < extMemoryCount; i++) {
669 			// Type 1 is available memory
670 			if (extMemoryBlock[i].type != 1)
671 				continue;
672 
673 			uint64 base = extMemoryBlock[i].base_addr;
674 			uint64 length = extMemoryBlock[i].length;
675 			uint64 end = base + length;
676 
677 			// round everything up to page boundaries, exclusive of pages
678 			// it partially occupies
679 			base = ROUNDUP(base, B_PAGE_SIZE);
680 			end = ROUNDDOWN(end, B_PAGE_SIZE);
681 
682 			// We ignore all memory beyond 4 GB, if phys_addr_t is only
683 			// 32 bit wide.
684 			#if B_HAIKU_PHYSICAL_BITS == 32
685 				if (end > 0x100000000ULL)
686 					end = 0x100000000ULL;
687 			#endif
688 
689 			// Also ignore memory below 1 MB. Apparently some BIOSes fail to
690 			// provide the correct range type for some ranges (cf. #1925).
691 			// Later in the kernel we will reserve the range 0x0 - 0xa0000
692 			// and apparently 0xa0000 - 0x100000 never contain usable
693 			// memory, so we don't lose anything by doing that.
694 			if (base < 0x100000)
695 				base = 0x100000;
696 
697 			gKernelArgs.ignored_physical_memory
698 				+= length - (max_c(end, base) - base);
699 
700 			if (end <= base)
701 				continue;
702 
703 			status_t status = insert_physical_memory_range(base, end - base);
704 			if (status == B_ENTRY_NOT_FOUND) {
705 				panic("mmu_init(): Failed to add physical memory range "
706 					"%#" B_PRIx64 " - %#" B_PRIx64 " : all %d entries are "
707 					"used already!\n", base, end, MAX_PHYSICAL_MEMORY_RANGE);
708 			} else if (status != B_OK) {
709 				panic("mmu_init(): Failed to add physical memory range "
710 					"%#" B_PRIx64 " - %#" B_PRIx64 "\n", base, end);
711 			}
712 		}
713 
714 		uint64 initialPhysicalMemory = total_physical_memory();
715 
716 		// second scan: remove everything reserved that may overlap
717 		for (uint32 i = 0; i < extMemoryCount; i++) {
718 			if (extMemoryBlock[i].type == 1)
719 				continue;
720 
721 			uint64 base = extMemoryBlock[i].base_addr;
722 			uint64 end = ROUNDUP(base + extMemoryBlock[i].length, B_PAGE_SIZE);
723 			base = ROUNDDOWN(base, B_PAGE_SIZE);
724 
725 			status_t status = remove_physical_memory_range(base, end - base);
726 			if (status != B_OK) {
727 				panic("mmu_init(): Failed to remove physical memory range "
728 					"%#" B_PRIx64 " - %#" B_PRIx64 "\n", base, end);
729 			}
730 		}
731 
732 		// sort the ranges
733 		sort_address_ranges(gKernelArgs.physical_memory_range,
734 			gKernelArgs.num_physical_memory_ranges);
735 
736 		// On some machines we get several ranges that contain only a few pages
737 		// (or even only one) each, which causes us to run out of MTRRs later.
738 		// So we remove all ranges smaller than 64 KB, hoping that this will
739 		// leave us only with a few larger contiguous ranges (ideally one).
740 		for (int32 i = gKernelArgs.num_physical_memory_ranges - 1; i >= 0;
741 				i--) {
742 			uint64 size = gKernelArgs.physical_memory_range[i].size;
743 			if (size < 64 * 1024) {
744 				uint64 start = gKernelArgs.physical_memory_range[i].start;
745 				remove_physical_memory_range(start, size);
746 			}
747 		}
748 
749 		gKernelArgs.ignored_physical_memory
750 			+= initialPhysicalMemory - total_physical_memory();
751 	} else {
752 		bios_regs regs;
753 
754 		// We dont have an extended map, assume memory is contiguously mapped
755 		// at 0x0, but leave out the BIOS range ((640k - 1 page) to 1 MB).
756 		gKernelArgs.physical_memory_range[0].start = 0;
757 		gKernelArgs.physical_memory_range[0].size = 0x9f000;
758 		gKernelArgs.physical_memory_range[1].start = 0x100000;
759 
760 		regs.eax = 0xe801; // AX
761 		call_bios(0x15, &regs);
762 		if ((regs.flags & CARRY_FLAG) != 0) {
763 			regs.eax = 0x8800; // AH 88h
764 			call_bios(0x15, &regs);
765 			if ((regs.flags & CARRY_FLAG) != 0) {
766 				// TODO: for now!
767 				dprintf("No memory size - using 64 MB (fix me!)\n");
768 				uint32 memSize = 64 * 1024 * 1024;
769 				gKernelArgs.physical_memory_range[1].size = memSize - 0x100000;
770 			} else {
771 				dprintf("Get Extended Memory Size succeeded.\n");
772 				gKernelArgs.physical_memory_range[1].size = regs.eax * 1024;
773 			}
774 			gKernelArgs.num_physical_memory_ranges = 2;
775 		} else {
776 			dprintf("Get Memory Size for Large Configurations succeeded.\n");
777 			gKernelArgs.physical_memory_range[1].size = regs.ecx * 1024;
778 			gKernelArgs.physical_memory_range[2].start = 0x1000000;
779 			gKernelArgs.physical_memory_range[2].size = regs.edx * 64 * 1024;
780 			gKernelArgs.num_physical_memory_ranges = 3;
781 		}
782 	}
783 
784 	gKernelArgs.arch_args.page_hole = 0xffc00000;
785 }
786 
787 
788 //	#pragma mark -
789 
790 
791 extern "C" status_t
792 platform_allocate_region(void **_address, size_t size, uint8 protection,
793 	bool /*exactAddress*/)
794 {
795 	void *address = mmu_allocate(*_address, size);
796 	if (address == NULL)
797 		return B_NO_MEMORY;
798 
799 	*_address = address;
800 	return B_OK;
801 }
802 
803 
804 extern "C" status_t
805 platform_free_region(void *address, size_t size)
806 {
807 	mmu_free(address, size);
808 	return B_OK;
809 }
810 
811 
812 void
813 platform_release_heap(struct stage2_args *args, void *base)
814 {
815 	// It will be freed automatically, since it is in the
816 	// identity mapped region, and not stored in the kernel's
817 	// page tables.
818 }
819 
820 
821 status_t
822 platform_init_heap(struct stage2_args *args, void **_base, void **_top)
823 {
824 	void *heap = (void *)get_next_physical_address(args->heap_size);
825 	if (heap == NULL)
826 		return B_NO_MEMORY;
827 
828 	*_base = heap;
829 	*_top = (void *)((int8 *)heap + args->heap_size);
830 	return B_OK;
831 }
832 
833 
834 extern "C" status_t
835 platform_bootloader_address_to_kernel_address(void *address, addr_t *_result)
836 {
837 	TRACE("%s: called\n", __func__);
838 	// bios_ia32 really doesn't need an address converstion
839 	*_result = (addr_t)address;
840 	return B_OK;
841 }
842 
843 
844 extern "C" status_t
845 platform_kernel_address_to_bootloader_address(addr_t address, void **_result)
846 {
847 	TRACE("%s: called\n", __func__);
848 	// bios_ia32 really doesn't need an address converstion
849 	*_result = (void*)address;
850 	return B_OK;
851 }
852