xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision 388d91a7b829b91b95abd2505437d431c468ce7d)
1 /*
2  * Copyright 2019-2022 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "mmu.h"
10 #include "efi_platform.h"
11 
12 #include "aarch64.h"
13 #include "arch_mmu.h"
14 
15 // #define TRACE_MMU
16 #ifdef TRACE_MMU
17 #	define TRACE(x...) dprintf(x)
18 #else
19 #	define TRACE(x...) ;
20 #endif
21 
22 
23 //#define TRACE_MEMORY_MAP
24 //#define TRACE_PAGE_DIRECTORY
25 
26 
27 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
28 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
29 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
30 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
31 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
32 };
33 
34 
35 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
36 /* ARM port */
37 static uint64_t* sPageDirectory = NULL;
38 // static uint64_t* sFirstPageTable = NULL;
39 static uint64_t* sNextPageTable = NULL;
40 // static uint64_t* sLastPageTable = NULL;
41 
42 
43 const char*
44 granule_type_str(int tg)
45 {
46 	switch (tg) {
47 		case TG_4KB:
48 			return "4KB";
49 		case TG_16KB:
50 			return "16KB";
51 		case TG_64KB:
52 			return "64KB";
53 		default:
54 			return "Invalid Granule";
55 	}
56 }
57 
58 
59 void
60 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
61 {
62 	ARMv8TranslationTableDescriptor ttd(table);
63 
64 	if (currentLevel >= CurrentRegime.MaxLevels()) {
65 		// This should not happen
66 		panic("Too many levels ...");
67 		return;
68 	}
69 
70 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
71 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
72 		if (!ttd.IsInvalid()) {
73 			TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
74 			if (ttd.IsTable() && currentLevel < 3) {
75 				TRACE("Table! Next Level:\n");
76 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
77 			}
78 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
79 				TRACE("Block/Page");
80 
81 				if (i & 1) { // 2 entries per row
82 					TRACE("\n");
83 				} else {
84 					TRACE("\t");
85 				}
86 			}
87 		}
88 		ttd.Next();
89 	}
90 }
91 
92 
93 #ifdef TRACE_PAGE_DIRECTORY
94 void
95 arch_mmu_dump_present_tables()
96 {
97 	uint64 address = arch_mmu_base_register();
98 	dprintf("Under TTBR0: %lx\n", address);
99 
100 	arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
101 
102 	/* We are willing to transition, but still in EL2, present MMU configuration
103 	 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
104 	 * allocated under sPageDirectory, defined under TTBR1_EL1.
105 	 */
106 	dprintf("Under allocated TTBR1_EL1:\n");
107 	arch_mmu_dump_table(sPageDirectory, 0);
108 }
109 #endif
110 
111 
112 void arch_mmu_setup_EL1(uint64 tcr) {
113 
114 	// Enable TTBR1
115 	tcr &= ~TCR_EPD1_DISABLE;
116 
117 	// Set space for kernel space
118 	tcr &= ~T1SZ_MASK; // Clear
119 	// TODO: Compiler dependency?
120 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
121 
122 	WRITE_SPECIALREG(TCR_EL1, tcr);
123 }
124 
125 
126 uint64
127 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
128 	uint32_t level, uint64_t flags, uint64* descriptor)
129 {
130 	ARMv8TranslationTableDescriptor ttd(descriptor);
131 
132 	if (level >= CurrentRegime.MaxLevels()) {
133 		panic("Too many levels at mapping\n");
134 	}
135 
136 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
137 
138 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
139 
140 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
141 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
142 
143 	TRACE("Level %x, Processing desc %lx indexing %lx\n",
144 		level, reinterpret_cast<uint64>(descriptor), ttd.Location());
145 
146 	if (ttd.IsInvalid()) {
147 		// If the physical has the same alignment we could make a block here
148 		// instead of using a complete next level table
149 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
150 			// Set it as block or page
151 			if (CurrentRegime.BlocksAllowed(level)) {
152 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
153 			} else {
154 				// Most likely in Level 3...
155 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
156 			}
157 
158 			// Expand!
159 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
160 
161 			do {
162 				phys_addr += currentLevelSize;
163 				expandedSize -= currentLevelSize;
164 				if (expandedSize > 0) {
165 					ttd.Next();
166 					if (CurrentRegime.BlocksAllowed(level)) {
167 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
168 					} else {
169 						// Most likely in Level 3...
170 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
171 					}
172 				}
173 			} while (expandedSize > 0);
174 
175 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
176 
177 		} else {
178 			// Set it to next level
179 			uint64 offset = 0;
180 			uint64 remainingSize = size;
181 			do {
182 				uint64* page = NULL;
183 				if (ttd.IsInvalid()) {
184 					// our region is too small would need to create a level below
185 					page = CurrentRegime.AllocatePage();
186 					ttd.SetToTable(page, flags);
187 				} else if (ttd.IsTable()) {
188 					// Next table is allocated, follow it
189 					page = ttd.Dereference();
190 				} else {
191 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
192 				}
193 
194 				uint64 unprocessedSize = map_region(virt_addr + offset,
195 					phys_addr + offset, remainingSize, level + 1, flags, page);
196 
197 				offset = remainingSize - unprocessedSize;
198 
199 				remainingSize = unprocessedSize;
200 
201 				ttd.Next();
202 
203 			} while (remainingSize > 0);
204 
205 			return 0;
206 		}
207 
208 	} else {
209 
210 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
211 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
212 		) {
213 			// TODO: Review, overlap? expand?
214 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
215 			return 0;
216 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
217 			// Next Level
218 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
219 			return 0;
220 		} else {
221 			panic("All descriptor types processed for %lx\n", ttd.Location());
222 			return 0;
223 		}
224 	}
225 }
226 
227 
228 static void
229 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
230 {
231 	TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
232 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
233 
234 	// TODO: Review why we get ranges with 0 size ...
235 	if (size == 0) {
236 		TRACE("Requesing 0 size map\n");
237 		return;
238 	}
239 
240 	// TODO: Review this case
241 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
242 		TRACE("Trying to map the TTBR itself?!\n");
243 		return;
244 	}
245 
246 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
247 		TRACE("Range already covered in current MMU\n");
248 		return;
249 	}
250 
251 	uint64 address;
252 
253 	if (arch_mmu_is_kernel_address(virt_addr)) {
254 		// Use TTBR1
255 		address = READ_SPECIALREG(TTBR1_EL1);
256 	} else {
257 		// ok, but USE instead TTBR0
258 		address = READ_SPECIALREG(TTBR0_EL1);
259 	}
260 
261 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
262 
263 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
264 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
265 // 	}
266 
267 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
268 }
269 
270 
271 static void
272 build_physical_memory_list(size_t memory_map_size,
273 	efi_memory_descriptor* memory_map, size_t descriptor_size,
274 	uint32_t descriptor_version)
275 {
276 	addr_t addr = (addr_t)memory_map;
277 
278 	gKernelArgs.num_physical_memory_ranges = 0;
279 
280 	// First scan: Add all usable ranges
281 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
282 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
283 		switch (entry->Type) {
284 		case EfiLoaderCode:
285 		case EfiLoaderData:
286 			entry->VirtualStart = entry->PhysicalStart;
287 			break;
288 		case EfiBootServicesCode:
289 		case EfiBootServicesData:
290 		case EfiConventionalMemory: {
291 			// Usable memory.
292 			uint64_t base = entry->PhysicalStart;
293 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
294 			insert_physical_memory_range(base, size);
295 			break;
296 		}
297 		case EfiACPIReclaimMemory:
298 			// ACPI reclaim -- physical memory we could actually use later
299 			break;
300 		case EfiRuntimeServicesCode:
301 		case EfiRuntimeServicesData:
302 			entry->VirtualStart = entry->PhysicalStart;
303 			break;
304 		case EfiMemoryMappedIO:
305 			entry->VirtualStart = entry->PhysicalStart;
306 			break;
307 		}
308 	}
309 
310 	uint64_t initialPhysicalMemory = total_physical_memory();
311 
312 	// Second scan: Remove everything reserved that may overlap
313 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
314 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
315 		switch (entry->Type) {
316 		case EfiLoaderCode:
317 		case EfiLoaderData:
318 		case EfiBootServicesCode:
319 		case EfiBootServicesData:
320 		case EfiConventionalMemory:
321 			break;
322 		default:
323 			uint64_t base = entry->PhysicalStart;
324 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
325 			remove_physical_memory_range(base, size);
326 		}
327 	}
328 
329 	gKernelArgs.ignored_physical_memory
330 		+= initialPhysicalMemory - total_physical_memory();
331 
332 	sort_address_ranges(gKernelArgs.physical_memory_range,
333 		gKernelArgs.num_physical_memory_ranges);
334 }
335 
336 
337 static void
338 build_physical_allocated_list(size_t memory_map_size,
339 	efi_memory_descriptor* memory_map, size_t descriptor_size,
340 	uint32_t descriptor_version)
341 {
342 	addr_t addr = (addr_t)memory_map;
343 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
344 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
345 		switch (entry->Type) {
346 		case EfiLoaderData: {
347 			uint64_t base = entry->PhysicalStart;
348 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
349 			insert_physical_allocated_range(base, size);
350 			break;
351 		}
352 		default:
353 			;
354 		}
355 	}
356 
357 	sort_address_ranges(gKernelArgs.physical_allocated_range,
358 		gKernelArgs.num_physical_allocated_ranges);
359 }
360 
361 
362 void
363 arch_mmu_init()
364 {
365 	// Stub
366 }
367 
368 
369 void
370 arch_mmu_post_efi_setup(size_t memory_map_size,
371 	efi_memory_descriptor* memory_map, size_t descriptor_size,
372 	uint32_t descriptor_version)
373 {
374 	build_physical_allocated_list(memory_map_size, memory_map,
375 		descriptor_size, descriptor_version);
376 
377 	// Switch EFI to virtual mode, using the kernel pmap.
378 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
379 		descriptor_version, memory_map);
380 
381 #ifdef TRACE_MEMORY_MAP
382 	dprintf("phys memory ranges:\n");
383 	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
384 		uint64 start = gKernelArgs.physical_memory_range[i].start;
385 		uint64 size = gKernelArgs.physical_memory_range[i].size;
386 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
387 			start, start + size, size);
388 	}
389 
390 	dprintf("allocated phys memory ranges:\n");
391 	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
392 		uint64 start = gKernelArgs.physical_allocated_range[i].start;
393 		uint64 size = gKernelArgs.physical_allocated_range[i].size;
394 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
395 			start, start + size, size);
396 	}
397 
398 	dprintf("allocated virt memory ranges:\n");
399 	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
400 		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
401 		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
402 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
403 			start, start + size, size);
404 	}
405 
406 	dprintf("virt memory ranges to keep:\n");
407 	for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
408 		uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
409 		uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
410 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
411 			start, start + size, size);
412 	}
413 #endif
414 }
415 
416 
417 void
418 arch_mmu_allocate_kernel_page_tables(void)
419 {
420 	uint64* page = NULL;
421 	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
422 
423 	// Trust possible previous allocations of TTBR1
424 	// only if we come from a preset EL1 context
425 	if (ttbr1 != 0ll) {
426 		if (arch_exception_level() == 1) {
427 			page = reinterpret_cast<uint64*>(ttbr1);
428 			TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
429 		} else if (arch_exception_level() == 2) {
430 			TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
431 		}
432 	}
433 
434 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
435 	// be used solely.
436 	if (page == NULL) {
437 		page = CurrentRegime.AllocatePage();
438 		if (page != NULL) {
439 			WRITE_SPECIALREG(TTBR1_EL1, page);
440 		} else {
441 			panic("Not enough memory for kernel initial page\n");
442 		}
443 	}
444 
445 	sPageDirectory = page;
446 }
447 
448 
449 uint32_t
450 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
451 	efi_memory_descriptor* memory_map, size_t descriptor_size,
452 	uint32_t descriptor_version)
453 {
454 	addr_t memory_map_addr = (addr_t)memory_map;
455 
456 	MemoryAttributeIndirection currentMair;
457 
458 // 	arch_mmu_allocate_page_tables();
459 	arch_mmu_allocate_kernel_page_tables();
460 
461 	build_physical_memory_list(memory_map_size, memory_map,
462 		descriptor_size, descriptor_version);
463 
464 	TRACE("Mapping Code & Data\n");
465 
466 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
467 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
468 		switch (entry->Type) {
469 		case EfiLoaderCode:
470 		case EfiLoaderData:
471 			map_range(entry->VirtualStart, entry->PhysicalStart,
472 				entry->NumberOfPages * B_PAGE_SIZE,
473 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute
474 				| currentMair.MaskOf(MAIR_NORMAL_WB));
475 			break;
476 		default:
477 			;
478 		}
479 	}
480 
481 	TRACE("Mapping EFI_MEMORY_RUNTIME\n");
482 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
483 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
484 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
485 			map_range(entry->VirtualStart, entry->PhysicalStart,
486 				entry->NumberOfPages * B_PAGE_SIZE,
487 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
488 	}
489 
490 	TRACE("Mapping \"next\" regions\n");
491 	void* cookie = NULL;
492 	addr_t vaddr;
493 	phys_addr_t paddr;
494 	size_t size;
495 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
496 		map_range(vaddr, paddr, size,
497 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
498 			| currentMair.MaskOf(MAIR_NORMAL_WB));
499 	}
500 
501 	// TODO: We actually can only map physical RAM, mapping everything
502 	// could cause unwanted MMIO or bus errors on real hardware.
503 	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
504 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
505 		| currentMair.MaskOf(MAIR_NORMAL_WB));
506 
507 	if (gKernelArgs.arch_args.uart.kind[0] != 0) {
508 		// Map uart because we want to use it during early boot.
509 		uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
510 		uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
511 		uint64 base = get_next_virtual_address(regs_size);
512 
513 		map_range(base, regs_start, regs_size,
514 			ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
515 			currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
516 
517 		gKernelArgs.arch_args.uart.regs.start = base;
518 	}
519 
520 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
521 		gKernelArgs.num_virtual_allocated_ranges);
522 
523 	addr_t vir_pgdir;
524 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
525 
526 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
527 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
528 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
529 
530 	TRACE("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
531 		(uint32_t)gKernelArgs.arch_args.phys_pgdir);
532 	TRACE("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
533 		(uint32_t)gKernelArgs.arch_args.vir_pgdir);
534 	TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
535 		(uint32_t)gKernelArgs.arch_args.next_pagetable);
536 
537 #ifdef TRACE_PAGE_DIRECTORY
538 	arch_mmu_dump_present_tables();
539 #endif
540 
541 	return (uint64_t)sPageDirectory;
542 }
543