xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision 6f80a9801fedbe7355c4360bd204ba746ec3ec2d)
1 /*
2  * Copyright 2019-2022 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "mmu.h"
10 #include "efi_platform.h"
11 
12 #include "aarch64.h"
13 #include "arch_mmu.h"
14 
15 // #define TRACE_MMU
16 #ifdef TRACE_MMU
17 #	define TRACE(x) dprintf x
18 #else
19 #	define TRACE(x) ;
20 #endif
21 
22 
23 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
24 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
25 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
26 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
27 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
28 };
29 
30 
31 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
32 /* ARM port */
33 static uint64_t* sPageDirectory = NULL;
34 // static uint64_t* sFirstPageTable = NULL;
35 static uint64_t* sNextPageTable = NULL;
36 // static uint64_t* sLastPageTable = NULL;
37 
38 
39 const char*
40 granule_type_str(int tg)
41 {
42 	switch (tg) {
43 		case TG_4KB:
44 			return "4KB";
45 		case TG_16KB:
46 			return "16KB";
47 		case TG_64KB:
48 			return "64KB";
49 		default:
50 			return "Invalid Granule";
51 	}
52 }
53 
54 
55 void
56 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
57 {
58 	ARMv8TranslationTableDescriptor ttd(table);
59 
60 	if (currentLevel >= CurrentRegime.MaxLevels()) {
61 		// This should not happen
62 		panic("Too many levels ...");
63 		return;
64 	}
65 
66 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
67 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
68 		if (!ttd.IsInvalid()) {
69 			TRACE(("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value()));
70 			if (ttd.IsTable() && currentLevel < 3) {
71 				TRACE(("Table! Next Level:\n"));
72 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
73 			}
74 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
75 				TRACE(("Block/Page"));
76 
77 				if (i & 1) { // 2 entries per row
78 					TRACE(("\n"));
79 				} else {
80 					TRACE(("\t"));
81 				}
82 			}
83 		}
84 		ttd.Next();
85 	}
86 }
87 
88 
89 void
90 arch_mmu_dump_present_tables()
91 {
92 #ifdef TRACE_MMU
93 	if (arch_mmu_enabled()) {
94 		uint64 address = arch_mmu_base_register();
95 		TRACE(("Under TTBR0: %lx\n", address));
96 
97 		arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
98 
99 		/* We are willing to transition, but still in EL2, present MMU configuration
100 		 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
101 		 * allocated under sPageDirectory, defined under TTBR1_EL1.
102 		 */
103 		if (address != 0ul) {
104 			TRACE(("Under allocated TTBR1_EL1:\n"));
105 			arch_mmu_dump_table(sPageDirectory, 0);
106 		}
107 	}
108 #endif
109 }
110 
111 
112 void arch_mmu_setup_EL1(uint64 tcr) {
113 
114 	// Enable TTBR1
115 	tcr &= ~TCR_EPD1_DISABLE;
116 
117 	// Set space for kernel space
118 	tcr &= ~T1SZ_MASK; // Clear
119 	// TODO: Compiler dependency?
120 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
121 
122 	WRITE_SPECIALREG(TCR_EL1, tcr);
123 }
124 
125 
126 uint64
127 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
128 	uint32_t level, uint64_t flags, uint64* descriptor)
129 {
130 	ARMv8TranslationTableDescriptor ttd(descriptor);
131 
132 	if (level >= CurrentRegime.MaxLevels()) {
133 		panic("Too many levels at mapping\n");
134 	}
135 
136 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
137 
138 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
139 
140 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
141 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
142 
143 	TRACE(("Level %x, Processing desc %lx indexing %lx\n",
144 		level, reinterpret_cast<uint64>(descriptor), ttd.Location()));
145 
146 	if (ttd.IsInvalid()) {
147 		// If the physical has the same alignment we could make a block here
148 		// instead of using a complete next level table
149 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
150 			// Set it as block or page
151 			if (CurrentRegime.BlocksAllowed(level)) {
152 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
153 			} else {
154 				// Most likely in Level 3...
155 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
156 			}
157 
158 			// Expand!
159 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
160 
161 			do {
162 				phys_addr += currentLevelSize;
163 				expandedSize -= currentLevelSize;
164 				if (expandedSize > 0) {
165 					ttd.Next();
166 					if (CurrentRegime.BlocksAllowed(level)) {
167 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
168 					} else {
169 						// Most likely in Level 3...
170 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
171 					}
172 				}
173 			} while (expandedSize > 0);
174 
175 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
176 
177 		} else {
178 			// Set it to next level
179 			uint64 offset = 0;
180 			uint64 remainingSize = size;
181 			do {
182 				uint64* page = NULL;
183 				if (ttd.IsInvalid()) {
184 					// our region is too small would need to create a level below
185 					page = CurrentRegime.AllocatePage();
186 					ttd.SetToTable(page, flags);
187 				} else if (ttd.IsTable()) {
188 					// Next table is allocated, follow it
189 					page = ttd.Dereference();
190 				} else {
191 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
192 				}
193 
194 				uint64 unprocessedSize = map_region(virt_addr + offset,
195 					phys_addr + offset, remainingSize, level + 1, flags, page);
196 
197 				offset = remainingSize - unprocessedSize;
198 
199 				remainingSize = unprocessedSize;
200 
201 				ttd.Next();
202 
203 			} while (remainingSize > 0);
204 
205 			return 0;
206 		}
207 
208 	} else {
209 
210 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
211 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
212 		) {
213 			// TODO: Review, overlap? expand?
214 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
215 			return 0;
216 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
217 			// Next Level
218 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
219 			return 0;
220 		} else {
221 			panic("All descriptor types processed for %lx\n", ttd.Location());
222 			return 0;
223 		}
224 	}
225 }
226 
227 
228 static void
229 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
230 {
231 	TRACE(("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
232 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags));
233 
234 	// TODO: Review why we get ranges with 0 size ...
235 	if (size == 0) {
236 		TRACE(("Requesing 0 size map\n"));
237 		return;
238 	}
239 
240 	// TODO: Review this case
241 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
242 		TRACE(("Trying to map the TTBR itself?!\n"));
243 		return;
244 	}
245 
246 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
247 		TRACE(("Range already covered in current MMU\n"));
248 		return;
249 	}
250 
251 	uint64 address;
252 
253 	if (arch_mmu_is_kernel_address(virt_addr)) {
254 		// Use TTBR1
255 		address = READ_SPECIALREG(TTBR1_EL1);
256 	} else {
257 		// ok, but USE instead TTBR0
258 		address = READ_SPECIALREG(TTBR0_EL1);
259 	}
260 
261 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
262 
263 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
264 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
265 // 	}
266 
267 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
268 }
269 
270 
271 static void
272 build_physical_memory_list(size_t memory_map_size,
273 	efi_memory_descriptor* memory_map, size_t descriptor_size,
274 	uint32_t descriptor_version)
275 {
276 	addr_t addr = (addr_t)memory_map;
277 
278 	gKernelArgs.num_physical_memory_ranges = 0;
279 
280 	// First scan: Add all usable ranges
281 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
282 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
283 		switch (entry->Type) {
284 		case EfiLoaderCode:
285 		case EfiLoaderData:
286 			entry->VirtualStart = entry->PhysicalStart;
287 			break;
288 		case EfiBootServicesCode:
289 		case EfiBootServicesData:
290 		case EfiConventionalMemory: {
291 			// Usable memory.
292 			uint64_t base = entry->PhysicalStart;
293 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
294 			insert_physical_memory_range(base, size);
295 			break;
296 		}
297 		case EfiACPIReclaimMemory:
298 			// ACPI reclaim -- physical memory we could actually use later
299 			break;
300 		case EfiRuntimeServicesCode:
301 		case EfiRuntimeServicesData:
302 			entry->VirtualStart = entry->PhysicalStart;
303 			break;
304 		case EfiMemoryMappedIO:
305 			entry->VirtualStart = entry->PhysicalStart;
306 			break;
307 		}
308 	}
309 
310 	uint64_t initialPhysicalMemory = total_physical_memory();
311 
312 	// Second scan: Remove everything reserved that may overlap
313 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
314 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
315 		switch (entry->Type) {
316 		case EfiLoaderCode:
317 		case EfiLoaderData:
318 		case EfiBootServicesCode:
319 		case EfiBootServicesData:
320 		case EfiConventionalMemory:
321 			break;
322 		default:
323 			uint64_t base = entry->PhysicalStart;
324 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
325 			remove_physical_memory_range(base, size);
326 		}
327 	}
328 
329 	gKernelArgs.ignored_physical_memory
330 		+= initialPhysicalMemory - total_physical_memory();
331 
332 	sort_address_ranges(gKernelArgs.physical_memory_range,
333 		gKernelArgs.num_physical_memory_ranges);
334 }
335 
336 
337 static void
338 build_physical_allocated_list(size_t memory_map_size,
339 	efi_memory_descriptor* memory_map, size_t descriptor_size,
340 	uint32_t descriptor_version)
341 {
342 	addr_t addr = (addr_t)memory_map;
343 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
344 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
345 		switch (entry->Type) {
346 		case EfiLoaderData: {
347 			uint64_t base = entry->PhysicalStart;
348 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
349 			insert_physical_allocated_range(base, size);
350 			break;
351 		}
352 		default:
353 			;
354 		}
355 	}
356 
357 	sort_address_ranges(gKernelArgs.physical_allocated_range,
358 		gKernelArgs.num_physical_allocated_ranges);
359 }
360 
361 
362 void
363 arch_mmu_init()
364 {
365 	// Stub
366 }
367 
368 
369 void
370 arch_mmu_post_efi_setup(size_t memory_map_size,
371 	efi_memory_descriptor* memory_map, size_t descriptor_size,
372 	uint32_t descriptor_version)
373 {
374 	build_physical_allocated_list(memory_map_size, memory_map,
375 		descriptor_size, descriptor_version);
376 
377 	// Switch EFI to virtual mode, using the kernel pmap.
378 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
379 		descriptor_version, memory_map);
380 
381 	TRACE(("phys memory ranges:\n"));
382 	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
383 		uint32_t start = (uint32_t)gKernelArgs.physical_memory_range[i].start;
384 		uint32_t size = (uint32_t)gKernelArgs.physical_memory_range[i].size;
385 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
386 			start, start + size, size));
387 	}
388 
389 	TRACE(("allocated phys memory ranges:\n"));
390 	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
391 		uint32_t start = (uint32_t)gKernelArgs.physical_allocated_range[i].start;
392 		uint32_t size = (uint32_t)gKernelArgs.physical_allocated_range[i].size;
393 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
394 			start, start + size, size));
395 	}
396 
397 	TRACE(("allocated virt memory ranges:\n"));
398 	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
399 		uint32_t start = (uint32_t)gKernelArgs.virtual_allocated_range[i].start;
400 		uint32_t size = (uint32_t)gKernelArgs.virtual_allocated_range[i].size;
401 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
402 			start, start + size, size));
403 	}
404 
405 }
406 
407 
408 void
409 arch_mmu_allocate_kernel_page_tables(void)
410 {
411 	uint64* page = NULL;
412 	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
413 
414 	// Trust possible previous allocations of TTBR1
415 	// only if we come from a preset EL1 context
416 	if (ttbr1 != 0ll) {
417 		if (arch_exception_level() == 1) {
418 			page = reinterpret_cast<uint64*>(ttbr1);
419 			TRACE(("Resusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1));
420 		} else if (arch_exception_level() == 2) {
421 			TRACE(("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1));
422 		}
423 	}
424 
425 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
426 	// be used solely.
427 	if (page == NULL) {
428 		page = CurrentRegime.AllocatePage();
429 		if (page != NULL) {
430 			WRITE_SPECIALREG(TTBR1_EL1, page);
431 		} else {
432 			panic("Not enough memory for kernel initial page\n");
433 		}
434 	}
435 
436 	sPageDirectory = page;
437 }
438 
439 
440 uint32_t
441 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
442 	efi_memory_descriptor* memory_map, size_t descriptor_size,
443 	uint32_t descriptor_version)
444 {
445 	addr_t memory_map_addr = (addr_t)memory_map;
446 
447 	MemoryAttributeIndirection currentMair;
448 
449 // 	arch_mmu_allocate_page_tables();
450 	arch_mmu_allocate_kernel_page_tables();
451 
452 	build_physical_memory_list(memory_map_size, memory_map,
453 		descriptor_size, descriptor_version);
454 
455 	TRACE(("Mapping Code & Data\n"));
456 
457 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
458 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
459 		switch (entry->Type) {
460 		case EfiLoaderCode:
461 		case EfiLoaderData:
462 			map_range(entry->VirtualStart, entry->PhysicalStart,
463 				entry->NumberOfPages * B_PAGE_SIZE,
464 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute
465 				| currentMair.MaskOf(MAIR_NORMAL_WB));
466 			break;
467 		default:
468 			;
469 		}
470 	}
471 
472 	TRACE(("Mapping EFI_MEMORY_RUNTIME\n"));
473 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
474 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
475 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
476 			map_range(entry->VirtualStart, entry->PhysicalStart,
477 				entry->NumberOfPages * B_PAGE_SIZE,
478 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
479 	}
480 
481 	TRACE(("Mapping \"next\" regions\n"));
482 	void* cookie = NULL;
483 	addr_t vaddr;
484 	phys_addr_t paddr;
485 	size_t size;
486 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
487 		map_range(vaddr, paddr, size,
488 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
489 			| currentMair.MaskOf(MAIR_NORMAL_WB));
490 	}
491 
492 /*  TODO: Not an UART here... inspect dtb?
493 	// identity mapping for the debug uart
494 	map_range(0x09000000, 0x09000000, B_PAGE_SIZE,
495 		ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute
496 		| currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
497 */
498 
499 	// TODO: We actually can only map physical RAM, mapping everything
500 	// could cause unwanted MMIO or bus errors on real hardware.
501 	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
502 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
503 		| currentMair.MaskOf(MAIR_NORMAL_WB));
504 
505 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
506 		gKernelArgs.num_virtual_allocated_ranges);
507 
508 	addr_t vir_pgdir;
509 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
510 
511 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
512 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
513 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
514 
515 	TRACE(("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
516 		(uint32_t)gKernelArgs.arch_args.phys_pgdir));
517 	TRACE(("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
518 		(uint32_t)gKernelArgs.arch_args.vir_pgdir));
519 	TRACE(("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
520 		(uint32_t)gKernelArgs.arch_args.next_pagetable));
521 
522 	return (uint64_t)sPageDirectory;
523 }
524