xref: /haiku/src/system/boot/platform/efi/mmu.cpp (revision 3aba70f980ccc3992e68f297814188fa5bda9904)
1 /*
2  * Copyright 2016 Haiku, Inc. All rights reserved.
3  * Copyright 2014, Jessica Hamilton, jessica.l.hamilton@gmail.com.
4  * Copyright 2014, Henry Harrington, henry.harrington@gmail.com.
5  * Distributed under the terms of the MIT License.
6  */
7 
8 
9 #include <algorithm>
10 
11 #include <boot/addr_range.h>
12 #include <boot/platform.h>
13 #include <boot/stage2.h>
14 #include <kernel/arch/x86/arch_kernel.h>
15 #include <kernel/kernel.h>
16 
17 #include "efi_platform.h"
18 #include "mmu.h"
19 
20 
21 struct allocated_memory_region {
22 	allocated_memory_region *next;
23 	uint64_t vaddr;
24 	uint64_t paddr;
25 	size_t size;
26 	bool released;
27 };
28 
29 
30 static uint64_t next_virtual_address = KERNEL_LOAD_BASE_64_BIT + 32 * 1024 * 1024;
31 static allocated_memory_region *allocated_memory_regions = NULL;
32 
33 
34 static uint64_t mmu_allocate_page()
35 {
36 	efi_physical_addr addr;
37 	efi_status s = kBootServices->AllocatePages(AllocateAnyPages, EfiLoaderData, 1, &addr);
38 	if (s != EFI_SUCCESS)
39 		panic("Unabled to allocate memory: %li", s);
40 
41 	return addr;
42 }
43 
44 
45 uint64_t
46 mmu_generate_post_efi_page_tables(size_t memory_map_size,
47 	efi_memory_descriptor *memory_map, size_t descriptor_size,
48 	uint32_t descriptor_version)
49 {
50 	// Generate page tables, matching bios_ia32/long.cpp.
51 	uint64_t *pml4;
52 	uint64_t *pdpt;
53 	uint64_t *pageDir;
54 	uint64_t *pageTable;
55 
56 	// Allocate the top level PML4.
57 	pml4 = NULL;
58 	if (platform_allocate_region((void**)&pml4, B_PAGE_SIZE, 0, false) != B_OK)
59 		panic("Failed to allocate PML4.");
60 	gKernelArgs.arch_args.phys_pgdir = (uint32_t)(addr_t)pml4;
61 	memset(pml4, 0, B_PAGE_SIZE);
62 	platform_bootloader_address_to_kernel_address(pml4, &gKernelArgs.arch_args.vir_pgdir);
63 
64 	// Store the virtual memory usage information.
65 	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_64_BIT;
66 	gKernelArgs.virtual_allocated_range[0].size = next_virtual_address - KERNEL_LOAD_BASE_64_BIT;
67 	gKernelArgs.num_virtual_allocated_ranges = 1;
68 	gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE_64_BIT
69 		+ gKernelArgs.virtual_allocated_range[0].size, 0x200000);
70 
71 	// Find the highest physical memory address. We map all physical memory
72 	// into the kernel address space, so we want to make sure we map everything
73 	// we have available.
74 	uint64 maxAddress = 0;
75 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
76 		efi_memory_descriptor *entry = (efi_memory_descriptor *)((addr_t)memory_map + i * descriptor_size);
77 		maxAddress = std::max(maxAddress,
78 				      entry->PhysicalStart + entry->NumberOfPages * 4096);
79 	}
80 
81 	// Want to map at least 4GB, there may be stuff other than usable RAM that
82 	// could be in the first 4GB of physical address space.
83 	maxAddress = std::max(maxAddress, (uint64)0x100000000ll);
84 	maxAddress = ROUNDUP(maxAddress, 0x40000000);
85 
86 	// Currently only use 1 PDPT (512GB). This will need to change if someone
87 	// wants to use Haiku on a box with more than 512GB of RAM but that's
88 	// probably not going to happen any time soon.
89 	if (maxAddress / 0x40000000 > 512)
90 		panic("Can't currently support more than 512GB of RAM!");
91 
92 	// Create page tables for the physical map area. Also map this PDPT
93 	// temporarily at the bottom of the address space so that we are identity
94 	// mapped.
95 
96 	pdpt = (uint64*)mmu_allocate_page();
97 	memset(pdpt, 0, B_PAGE_SIZE);
98 	pml4[510] = (addr_t)pdpt | kTableMappingFlags;
99 	pml4[0] = (addr_t)pdpt | kTableMappingFlags;
100 
101 	for (uint64 i = 0; i < maxAddress; i += 0x40000000) {
102 		pageDir = (uint64*)mmu_allocate_page();
103 		memset(pageDir, 0, B_PAGE_SIZE);
104 		pdpt[i / 0x40000000] = (addr_t)pageDir | kTableMappingFlags;
105 
106 		for (uint64 j = 0; j < 0x40000000; j += 0x200000) {
107 			pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags;
108 		}
109 	}
110 
111 	// Allocate tables for the kernel mappings.
112 
113 	pdpt = (uint64*)mmu_allocate_page();
114 	memset(pdpt, 0, B_PAGE_SIZE);
115 	pml4[511] = (addr_t)pdpt | kTableMappingFlags;
116 
117 	pageDir = (uint64*)mmu_allocate_page();
118 	memset(pageDir, 0, B_PAGE_SIZE);
119 	pdpt[510] = (addr_t)pageDir | kTableMappingFlags;
120 
121 	// We can now allocate page tables and duplicate the mappings across from
122 	// the 32-bit address space to them.
123 	pageTable = NULL; // shush, compiler.
124 	for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size
125 			/ B_PAGE_SIZE; i++) {
126 		if ((i % 512) == 0) {
127 			pageTable = (uint64*)mmu_allocate_page();
128 			memset(pageTable, 0, B_PAGE_SIZE);
129 			pageDir[i / 512] = (addr_t)pageTable | kTableMappingFlags;
130 		}
131 
132 		// Get the physical address to map.
133 		void *phys;
134 		if (platform_kernel_address_to_bootloader_address(KERNEL_LOAD_BASE_64_BIT + (i * B_PAGE_SIZE),
135 								  &phys) != B_OK)
136 			continue;
137 
138 		pageTable[i % 512] = (addr_t)phys | kPageMappingFlags;
139 	}
140 
141 	return (uint64)pml4;
142 }
143 
144 
145 // Called after EFI boot services exit.
146 // Currently assumes that the memory map is sane... Sorted and no overlapping
147 // regions.
148 void
149 mmu_post_efi_setup(size_t memory_map_size, efi_memory_descriptor *memory_map, size_t descriptor_size, uint32_t descriptor_version)
150 {
151 	// Add physical memory to the kernel args and update virtual addresses for EFI regions..
152 	addr_t addr = (addr_t)memory_map;
153 	gKernelArgs.num_physical_memory_ranges = 0;
154 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
155 		efi_memory_descriptor *entry = (efi_memory_descriptor *)(addr + i * descriptor_size);
156 		switch (entry->Type) {
157 		case EfiLoaderCode:
158 		case EfiLoaderData:
159 		case EfiBootServicesCode:
160 		case EfiBootServicesData:
161 		case EfiConventionalMemory: {
162 			// Usable memory.
163 			// Ignore memory below 1MB and above 512GB.
164 			uint64_t base = entry->PhysicalStart;
165 			uint64_t end = entry->PhysicalStart + entry->NumberOfPages * 4096;
166 			if (base < 0x100000)
167 				base = 0x100000;
168 			if (end > (512ull * 1024 * 1024 * 1024))
169 				end = 512ull * 1024 * 1024 * 1024;
170 			if (base >= end)
171 				break;
172 			uint64_t size = end - base;
173 
174 			insert_physical_memory_range(base, size);
175 			// LoaderData memory is bootloader allocated memory, possibly
176 			// containing the kernel or loaded drivers.
177 			if (entry->Type == EfiLoaderData)
178 				insert_physical_allocated_range(base, size);
179 			break;
180 		}
181 		case EfiACPIReclaimMemory:
182 			// ACPI reclaim -- physical memory we could actually use later
183 			gKernelArgs.ignored_physical_memory += entry->NumberOfPages * 4096;
184 			break;
185 		case EfiRuntimeServicesCode:
186 		case EfiRuntimeServicesData:
187 			entry->VirtualStart = entry->PhysicalStart;
188 			break;
189 		}
190 	}
191 
192 	// Sort the address ranges.
193 	sort_address_ranges(gKernelArgs.physical_memory_range,
194 		gKernelArgs.num_physical_memory_ranges);
195 	sort_address_ranges(gKernelArgs.physical_allocated_range,
196 		gKernelArgs.num_physical_allocated_ranges);
197 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
198 		gKernelArgs.num_virtual_allocated_ranges);
199 
200 	// Switch EFI to virtual mode, using the kernel pmap.
201 	// Something involving ConvertPointer might need to be done after this?
202 	// http://wiki.phoenix.com/wiki/index.php/EFI_RUNTIME_SERVICES#SetVirtualAddressMap.28.29
203 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size, descriptor_version, memory_map);
204 
205 	// Important.  Make sure supervisor threads can fault on read only pages...
206 	#if defined(__x86_64__) || defined(__x86__)
207 	asm("mov %%rax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
208 	#else
209 	#error Ensure supervisor threads can fault on read-only pages on this architecture!
210 	#endif
211 }
212 
213 
214 // Platform allocator.
215 // The bootloader assumes that bootloader address space == kernel address space.
216 // This is not true until just before the kernel is booted, so an ugly hack is
217 // used to cover the difference. platform_allocate_region allocates addresses
218 // in bootloader space, but can convert them to kernel space. The ELF loader
219 // accesses kernel memory via Mao(), and much later in the boot process,
220 // addresses in the kernel argument struct are converted from bootloader
221 // addresses to kernel addresses.
222 
223 extern "C" status_t
224 platform_allocate_region(void **_address, size_t size, uint8 /* protection */, bool exactAddress)
225 {
226 	// We don't have any control over the page tables, give up right away if an
227 	// exactAddress is wanted.
228 	if (exactAddress)
229 		return B_NO_MEMORY;
230 
231 	efi_physical_addr addr;
232 	size_t aligned_size = ROUNDUP(size, B_PAGE_SIZE);
233 	allocated_memory_region *region = new(std::nothrow) allocated_memory_region;
234 
235 	if (region == NULL)
236 		return B_NO_MEMORY;
237 
238 	efi_status status = kBootServices->AllocatePages(AllocateAnyPages,
239 		EfiLoaderData, aligned_size / B_PAGE_SIZE, &addr);
240 	if (status != EFI_SUCCESS) {
241 		delete region;
242 		return B_NO_MEMORY;
243 	}
244 
245 	// Addresses above 512GB not supported.
246 	// Memory map regions above 512GB can be ignored, but if EFI returns pages
247 	// above that there's nothing that can be done to fix it.
248 	if (addr + size > (512ull * 1024 * 1024 * 1024))
249 		panic("Can't currently support more than 512GB of RAM!");
250 
251 	region->next = allocated_memory_regions;
252 	allocated_memory_regions = region;
253 	region->vaddr = 0;
254 	region->paddr = addr;
255 	region->size = size;
256 	region->released = false;
257 
258 	if (*_address != NULL) {
259 		region->vaddr = (uint64_t)*_address;
260 	}
261 
262 	//dprintf("Allocated region %#lx (requested %p) %#lx %lu\n", region->vaddr, *_address, region->paddr, region->size);
263 
264 	*_address = (void *)region->paddr;
265 
266 	return B_OK;
267 }
268 
269 
270 /*!
271 	Neither \a virtualAddress nor \a size need to be aligned, but the function
272 	will map all pages the range intersects with.
273 	If physicalAddress is not page-aligned, the returned virtual address will
274 	have the same "misalignment".
275 */
276 extern "C" addr_t
277 mmu_map_physical_memory(addr_t physicalAddress, size_t size, uint32 flags)
278 {
279 	addr_t pageOffset = physicalAddress & (B_PAGE_SIZE - 1);
280 
281 	physicalAddress -= pageOffset;
282 	size += pageOffset;
283 
284 	if (insert_physical_allocated_range(physicalAddress, ROUNDUP(size, B_PAGE_SIZE)) != B_OK)
285 		return B_NO_MEMORY;
286 
287 	return physicalAddress + pageOffset;
288 }
289 
290 
291 extern "C" void
292 mmu_free(void *virtualAddress, size_t size)
293 {
294 	addr_t physicalAddress = (addr_t)virtualAddress;
295 	addr_t pageOffset = physicalAddress & (B_PAGE_SIZE - 1);
296 
297 	physicalAddress -= pageOffset;
298 	size += pageOffset;
299 
300 	size_t aligned_size = ROUNDUP(size, B_PAGE_SIZE);
301 
302 	for (allocated_memory_region *region = allocated_memory_regions; region; region = region->next) {
303 		if (region->paddr == physicalAddress && region->size == aligned_size) {
304 			region->released = true;
305 			return;
306 		}
307 	}
308 }
309 
310 
311 static allocated_memory_region *
312 get_region(void *address, size_t size)
313 {
314 	for (allocated_memory_region *region = allocated_memory_regions; region; region = region->next) {
315 		if (region->paddr == (uint64_t)address && region->size == size) {
316 			return region;
317 		}
318 	}
319 	return 0;
320 }
321 
322 
323 static void
324 convert_physical_ranges() {
325 	addr_range *range = gKernelArgs.physical_allocated_range;
326 	uint32 num_ranges = gKernelArgs.num_physical_allocated_ranges;
327 
328 	for (uint32 i = 0; i < num_ranges; ++i) {
329 		allocated_memory_region *region = new(std::nothrow) allocated_memory_region;
330 
331 		if (!region)
332 			panic("Couldn't add allocated region");
333 
334 		// Addresses above 512GB not supported.
335 		// Memory map regions above 512GB can be ignored, but if EFI returns pages above
336 		// that there's nothing that can be done to fix it.
337 		if (range[i].start + range[i].size > (512ull * 1024 * 1024 * 1024))
338 			panic("Can't currently support more than 512GB of RAM!");
339 
340 		region->next = allocated_memory_regions;
341 		allocated_memory_regions = region;
342 		region->vaddr = 0;
343 		region->paddr = range[i].start;
344 		region->size = range[i].size;
345 		region->released = false;
346 
347 		// Clear out the allocated range
348 		range[i].start = 0;
349 		range[i].size = 0;
350 		gKernelArgs.num_physical_allocated_ranges--;
351 	}
352 }
353 
354 
355 extern "C" status_t
356 platform_bootloader_address_to_kernel_address(void *address, uint64_t *_result)
357 {
358 	// Convert any physical ranges prior to looking up address
359 	convert_physical_ranges();
360 
361 	uint64_t addr = (uint64_t)address;
362 
363 	for (allocated_memory_region *region = allocated_memory_regions; region; region = region->next) {
364 		if (region->paddr <= addr && addr < region->paddr + region->size) {
365 			// Lazily allocate virtual memory.
366 			if (region->vaddr == 0) {
367 				region->vaddr = next_virtual_address;
368 				next_virtual_address += ROUNDUP(region->size, B_PAGE_SIZE);
369 			}
370 			*_result = region->vaddr + (addr - region->paddr);
371 			//dprintf("Converted bootloader address %p in region %#lx-%#lx to %#lx\n",
372 			//	address, region->paddr, region->paddr + region->size, *_result);
373 			return B_OK;
374 		}
375 	}
376 
377 	return B_ERROR;
378 }
379 
380 
381 extern "C" status_t
382 platform_kernel_address_to_bootloader_address(uint64_t address, void **_result)
383 {
384 	for (allocated_memory_region *region = allocated_memory_regions; region; region = region->next) {
385 		if (region->vaddr != 0 && region->vaddr <= address && address < region->vaddr + region->size) {
386 			*_result = (void *)(region->paddr + (address - region->vaddr));
387 			//dprintf("Converted kernel address %#lx in region %#lx-%#lx to %p\n",
388 			//	address, region->vaddr, region->vaddr + region->size, *_result);
389 			return B_OK;
390 		}
391 	}
392 
393 	return B_ERROR;
394 }
395 
396 
397 extern "C" status_t
398 platform_free_region(void *address, size_t size)
399 {
400 	//dprintf("Release region %p %lu\n", address, size);
401 	allocated_memory_region *region = get_region(address, size);
402 	if (!region)
403 		panic("Unknown region??");
404 
405 	kBootServices->FreePages((efi_physical_addr)address, ROUNDUP(size, B_PAGE_SIZE) / B_PAGE_SIZE);
406 
407 	return B_OK;
408 }
409