xref: /haiku/src/system/boot/platform/efi/arch/x86/arch_mmu.cpp (revision 9e25244c5e9051f6cd333820d6332397361abd6c)
1 /*
2  * Copyright 2021-2022 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 
7 #include <algorithm>
8 
9 #include <kernel.h>
10 #include <arch_kernel.h>
11 #include <arch/cpu.h>
12 #include <arch/x86/descriptors.h>
13 #include <boot/platform.h>
14 #include <boot/stage2.h>
15 #include <efi/types.h>
16 #include <efi/boot-services.h>
17 
18 #include "efi_platform.h"
19 #include "mmu.h"
20 
21 
22 //#define TRACE_MMU
23 #ifdef TRACE_MMU
24 #	define TRACE(x...) dprintf(x)
25 #else
26 #	define TRACE(x...) ;
27 #endif
28 
29 
30 //#define TRACE_MEMORY_MAP
31 //#define TRACE_PAGE_DIRECTORY
32 
33 #define VADDR_TO_PDENT(va)		(((va) / B_PAGE_SIZE) / 1024)
34 #define VADDR_TO_PTENT(va)		(((va) / B_PAGE_SIZE) % 1024)
35 #define X86_PDE_ADDRESS_MASK	0xfffff000
36 #define X86_PTE_ADDRESS_MASK	0xfffff000
37 
38 #define ALIGN_PAGEDIR			B_PAGE_SIZE
39 
40 
41 struct gdt_idt_descr {
42 	uint16_t	limit;
43 	uint32_t	base;
44 } _PACKED;
45 
46 
47 gdt_idt_descr gBootGDTDescriptor;
48 segment_descriptor *gBootGDT = NULL;
49 
50 
51 static const uint32_t kDefaultPageTableFlags = 0x07;      // present, user, R/W
52 
53 
54 static uint32_t *sPageDirectory = NULL;
55 
56 
57 #ifdef TRACE_PAGE_DIRECTORY
58 static void
59 dump_page_dir(void)
60 {
61 	dprintf("=== Page Directory ===\n");
62 	for (uint32_t i = 0; i < 1024; i++) {
63 		uint32_t directoryEntry = sPageDirectory[i];
64 		if (directoryEntry != 0) {
65 			dprintf("virt 0x%08x --> page table 0x%08x type 0x%08x\n",
66 				i << 22, directoryEntry & X86_PDE_ADDRESS_MASK,
67 				directoryEntry & (~X86_PDE_ADDRESS_MASK));
68 			uint32_t *pageTable = (uint32_t *)(directoryEntry & X86_PDE_ADDRESS_MASK);
69 			for (uint32_t j = 0; j < 1024; j++) {
70 				uint32_t tableEntry = pageTable[j];
71 				if (tableEntry != 0) {
72 					dprintf("virt 0x%08x     --> page 0x%08x type+flags 0x%08x\n",
73 						(i << 22) | (j << 12),
74 						tableEntry & X86_PTE_ADDRESS_MASK,
75 						tableEntry & (~X86_PTE_ADDRESS_MASK));
76 				}
77 			}
78 		}
79 	}
80 }
81 #endif
82 
83 
84 static uint32_t *
85 get_next_page_table(void)
86 {
87 	uint32_t *pageTable = (uint32_t *)mmu_allocate_page();
88 	memset(pageTable, 0, B_PAGE_SIZE);
89 	return pageTable;
90 }
91 
92 
93 static void
94 arch_mmu_init_gdt(void)
95 {
96 	if (platform_allocate_region((void **)&gBootGDT,
97 			BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor), 0, false) != B_OK) {
98 		panic("Failed to allocate GDT.\n");
99 	}
100 
101 	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
102 		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
103 		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
104 		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
105 
106 	// set up a new gdt
107 
108 	// put standard segment descriptors in GDT
109 	clear_segment_descriptor(&gBootGDT[0]);
110 
111 	// seg 0x08 - kernel 4GB code
112 	set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], 0, 0xffffffff,
113 		DT_CODE_READABLE, DPL_KERNEL);
114 
115 	// seg 0x10 - kernel 4GB data
116 	set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], 0, 0xffffffff,
117 		DT_DATA_WRITEABLE, DPL_KERNEL);
118 
119 	// seg 0x1b - ring 3 user 4GB code
120 	set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], 0, 0xffffffff,
121 		DT_CODE_READABLE, DPL_USER);
122 
123 	// seg 0x23 - ring 3 user 4GB data
124 	set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], 0, 0xffffffff,
125 		DT_DATA_WRITEABLE, DPL_USER);
126 
127 	addr_t virtualGDT;
128 	platform_bootloader_address_to_kernel_address(gBootGDT, &virtualGDT);
129 
130 	gBootGDTDescriptor.limit = BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor);
131 	gBootGDTDescriptor.base = (uint32_t)virtualGDT;
132 
133 	TRACE("gdt phys 0x%08x virt 0x%08" B_PRIxADDR " desc 0x%08x\n",
134 		(uint32_t)gBootGDT, virtualGDT,
135 		(uint32_t)&gBootGDTDescriptor);
136 	TRACE("gdt limit=%d base=0x%08x\n",
137 		gBootGDTDescriptor.limit, gBootGDTDescriptor.base);
138 }
139 
140 
141 static void
142 map_page(addr_t virtAddr, phys_addr_t physAddr, uint32_t flags)
143 {
144 	physAddr &= ~(B_PAGE_SIZE - 1);
145 
146 	uint32_t *pageTable = NULL;
147 	uint32_t pageDirectoryIndex = VADDR_TO_PDENT(virtAddr);
148 	uint32_t pageDirectoryEntry = sPageDirectory[pageDirectoryIndex];
149 
150 	if (pageDirectoryEntry == 0) {
151 		//TRACE("get next page table for address 0x%08" B_PRIxADDR "\n",
152 		//	virtAddr);
153 		pageTable = get_next_page_table();
154 		sPageDirectory[pageDirectoryIndex] = (uint32_t)pageTable | kDefaultPageTableFlags;
155 	} else {
156 		pageTable = (uint32_t *)(pageDirectoryEntry & X86_PDE_ADDRESS_MASK);
157 	}
158 
159 	uint32_t pageTableIndex = VADDR_TO_PTENT(virtAddr);
160 	pageTable[pageTableIndex] = physAddr | flags;
161 }
162 
163 
164 static void
165 map_range(addr_t virtAddr, phys_addr_t physAddr, size_t size, uint32_t flags)
166 {
167 	//TRACE("map 0x%08" B_PRIxADDR " --> 0x%08" B_PRIxPHYSADDR
168 	//	", len=0x%08" B_PRIxSIZE ", flags=0x%08" PRIx32 "\n",
169 	//	virtAddr, physAddr, size, flags);
170 
171 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
172 		map_page(virtAddr + offset, physAddr + offset, flags);
173 	}
174 
175 	if (virtAddr >= KERNEL_LOAD_BASE)
176 		ASSERT_ALWAYS(insert_virtual_allocated_range(virtAddr, size) >= B_OK);
177 }
178 
179 
180 static void
181 build_physical_memory_list(size_t memoryMapSize,
182 	efi_memory_descriptor *memoryMap, size_t descriptorSize,
183 	uint32_t descriptorVersion)
184 {
185 	addr_t addr = (addr_t)memoryMap;
186 
187 	gKernelArgs.num_physical_memory_ranges = 0;
188 
189 	// First scan: Add all usable ranges
190 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
191 		efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize);
192 		switch (entry->Type) {
193 			case EfiLoaderCode:
194 			case EfiLoaderData:
195 			case EfiBootServicesCode:
196 			case EfiBootServicesData:
197 			case EfiConventionalMemory: {
198 				// Usable memory.
199 				// Ignore memory below 1MB and above 512GB.
200 				uint64_t base = entry->PhysicalStart;
201 				uint64_t end = entry->PhysicalStart + entry->NumberOfPages * B_PAGE_SIZE;
202 				uint64_t originalSize = end - base;
203 				if (base < 0x100000)
204 					base = 0x100000;
205 				if (end > (512ull * 1024 * 1024 * 1024))
206 					end = 512ull * 1024 * 1024 * 1024;
207 
208 				gKernelArgs.ignored_physical_memory
209 					+= originalSize - (max_c(end, base) - base);
210 
211 				if (base >= end)
212 					break;
213 				uint64_t size = end - base;
214 
215 				insert_physical_memory_range(base, size);
216 				break;
217 			}
218 			default:
219 				break;
220 		}
221 	}
222 
223 	uint64_t initialPhysicalMemory = total_physical_memory();
224 
225 	// Second scan: Remove everything reserved that may overlap
226 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
227 		efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize);
228 		switch (entry->Type) {
229 			case EfiLoaderCode:
230 			case EfiLoaderData:
231 			case EfiBootServicesCode:
232 			case EfiBootServicesData:
233 			case EfiConventionalMemory:
234 				break;
235 			default:
236 				uint64_t base = entry->PhysicalStart;
237 				uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
238 				remove_physical_memory_range(base, size);
239 		}
240 	}
241 
242 	gKernelArgs.ignored_physical_memory
243 		+= initialPhysicalMemory - total_physical_memory();
244 
245 	sort_address_ranges(gKernelArgs.physical_memory_range,
246 		gKernelArgs.num_physical_memory_ranges);
247 }
248 
249 
250 static void
251 build_physical_allocated_list(size_t memoryMapSize,
252 	efi_memory_descriptor *memoryMap, size_t descriptorSize,
253 	uint32_t descriptorVersion)
254 {
255 	addr_t addr = (addr_t)memoryMap;
256 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
257 		efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize);
258 		switch (entry->Type) {
259 			case EfiLoaderData: {
260 				uint64_t base = entry->PhysicalStart;
261 				uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
262 				insert_physical_allocated_range(base, size);
263 				break;
264 			}
265 			default:
266 				;
267 		}
268 	}
269 
270 	sort_address_ranges(gKernelArgs.physical_allocated_range,
271 		gKernelArgs.num_physical_allocated_ranges);
272 }
273 
274 
275 void
276 arch_mmu_post_efi_setup(size_t memoryMapSize,
277 	efi_memory_descriptor *memoryMap, size_t descriptorSize,
278 	uint32_t descriptorVersion)
279 {
280 	build_physical_allocated_list(memoryMapSize, memoryMap,
281 		descriptorSize, descriptorVersion);
282 
283 	// Switch EFI to virtual mode, using the kernel pmap.
284 	kRuntimeServices->SetVirtualAddressMap(memoryMapSize, descriptorSize,
285 		descriptorVersion, memoryMap);
286 
287 #ifdef TRACE_MEMORY_MAP
288 	dprintf("phys memory ranges:\n");
289 	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
290 		uint64 start = gKernelArgs.physical_memory_range[i].start;
291 		uint64 size = gKernelArgs.physical_memory_range[i].size;
292 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
293 			start, start + size, size);
294 	}
295 
296 	dprintf("allocated phys memory ranges:\n");
297 	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
298 		uint64 start = gKernelArgs.physical_allocated_range[i].start;
299 		uint64 size = gKernelArgs.physical_allocated_range[i].size;
300 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
301 			start, start + size, size);
302 	}
303 
304 	dprintf("allocated virt memory ranges:\n");
305 	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
306 		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
307 		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
308 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
309 			start, start + size, size);
310 	}
311 #endif
312 }
313 
314 
315 static void
316 arch_mmu_allocate_page_directory(void)
317 {
318 	if (platform_allocate_region((void **)&sPageDirectory,
319 			B_PAGE_SIZE + ALIGN_PAGEDIR, 0, false) != B_OK)
320 		panic("Failed to allocate page directory.");
321 	sPageDirectory = (uint32_t *)ROUNDUP((uint32_t)sPageDirectory, ALIGN_PAGEDIR);
322 	memset(sPageDirectory, 0, B_PAGE_SIZE);
323 
324 	TRACE("sPageDirectory  = 0x%08x\n", (uint32_t)sPageDirectory);
325 }
326 
327 
328 uint32_t
329 arch_mmu_generate_post_efi_page_tables(size_t memoryMapSize,
330 	efi_memory_descriptor *memoryMap, size_t descriptorSize,
331 	uint32_t descriptorVersion)
332 {
333 	build_physical_memory_list(memoryMapSize, memoryMap,
334 		descriptorSize, descriptorVersion);
335 
336 	//TODO: find out how to map EFI runtime services
337 	//they are not mapped for now because the kernel doesn't use them anyway
338 #if 0
339 	addr_t memoryMapAddr = (addr_t)memoryMap;
340 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
341 		efi_memory_descriptor* entry =
342 			(efi_memory_descriptor *)(memoryMapAddr + i * descriptorSize);
343 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
344 			map_range(entry->VirtualStart, entry->PhysicalStart,
345 				entry->NumberOfPages * B_PAGE_SIZE,
346 				kDefaultPageFlags);
347 	}
348 #endif
349 
350 	void* cookie = NULL;
351 	addr_t vaddr;
352 	phys_addr_t paddr;
353 	size_t size;
354 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
355 		map_range(vaddr, paddr, size,
356 			kDefaultPageFlags);
357 	}
358 
359 	// identity mapping for first 1MB
360 	map_range((addr_t)0, (phys_addr_t)0, 1024*1024, kDefaultPageFlags);
361 
362 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
363 		gKernelArgs.num_virtual_allocated_ranges);
364 
365 	// Map the page directory into kernel space at 0xffc00000-0xffffffff
366 	// this enables a mmu trick where the 4 MB region that this pgdir entry
367 	// represents now maps the 4MB of potential pagetables that the pgdir
368 	// points to. Thrown away later in VM bringup, but useful for now.
369 	sPageDirectory[1023] = (uint32_t)sPageDirectory | kDefaultPageFlags;
370 
371 	addr_t virtPageDirectory;
372 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &virtPageDirectory);
373 
374 	gKernelArgs.arch_args.phys_pgdir = (uint32_t)sPageDirectory;
375 	gKernelArgs.arch_args.vir_pgdir = (uint32_t)virtPageDirectory;
376 	gKernelArgs.arch_args.page_hole = 0xffc00000;
377 	gKernelArgs.arch_args.virtual_end
378 		= gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].start
379 		+ gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].size;
380 
381 	TRACE("gKernelArgs.arch_args.phys_pgdir  = 0x%08" B_PRIx32 "\n",
382 		gKernelArgs.arch_args.phys_pgdir);
383 	TRACE("gKernelArgs.arch_args.vir_pgdir   = 0x%08" B_PRIx64 "\n",
384 		gKernelArgs.arch_args.vir_pgdir);
385 	TRACE("gKernelArgs.arch_args.page_hole   = 0x%08" B_PRIx64 "\n",
386 		gKernelArgs.arch_args.page_hole);
387 	TRACE("gKernelArgs.arch_args.virtual_end = 0x%08" B_PRIx64 "\n",
388 		gKernelArgs.arch_args.virtual_end);
389 
390 #ifdef TRACE_PAGE_DIRECTORY
391 	dump_page_dir();
392 #endif
393 
394 	return (uint32_t)sPageDirectory;
395 }
396 
397 
398 void
399 arch_mmu_init(void)
400 {
401 	arch_mmu_allocate_page_directory();
402 	arch_mmu_init_gdt();
403 }
404